diff --git a/pkg/epp/options/cli_flags.go b/pkg/epp/options/cli_flags.go new file mode 100644 index 000000000..98471cf10 --- /dev/null +++ b/pkg/epp/options/cli_flags.go @@ -0,0 +1,209 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "time" + + "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" +) + +var ( + // + // ext_proc configuration + // + GRPCPort = Flag{ + Name: "grpc-port", + DefValue: 9002, + Usage: "gRPC port used for communicating with Envoy proxy.", + } + EnableLeaderElection = Flag{ + Name: "ha-enable-leader-election", + DefValue: false, + Usage: "Enables leader election for high availability. When enabled, readiness probes will only pass on the leader.", + } + + // + // InferencePool + // + PoolGroup = Flag{ + Name: "pool-group", + DefValue: "inference.networking.k8s.io", + Usage: "Kubernetes resource group of the InferencePool this Endpoint Picker is associated with.", + } + PoolNamespace = Flag{ + Name: "pool-namespace", + DefValue: "default", + Usage: "Namespace of the InferencePool this Endpoint Picker is associated with.", + } + PoolName = Flag{ + Name: "pool-name", + DefValue: "", + Usage: "Name of the InferencePool this Endpoint Picker is associated with.", + } + + // + // Endpoints (in lieu of using an InferencePool) + // + EndpointSelector = Flag{ + Name: "endpoint-selector", + DefValue: "", + Usage: "Selector to filter model server pods on, only 'key=value' pairs are supported. " + + "Format: a comma-separated list of key=value pairs without whitespace (e.g., 'app=vllm-llama3-8b-instruct,env=prod').", + } + EndpointTargetPorts = Flag{ + Name: "endpoint-target-ports", + DefValue: "", + Usage: "Target ports of model server pods. " + + "Format: a comma-separated list of numbers without whitespace (e.g., '3000,3001,3002').", + } + + // + // MSP metrics scraping + // + ModelServerMetricsScheme = Flag{ + Name: "model-server-metrics-scheme", + DefValue: "http", + Usage: "Protocol scheme used in scraping metrics from endpoints.", + } + ModelServerMetricsPath = Flag{ + Name: "model-server-metrics-path", + DefValue: "/metrics", + Usage: "URL path used in scraping metrics from endpoints.", + } + ModelServerMetricsPort = Flag{ + Name: "model-server-metrics-port", + DefValue: 0, + Usage: "Port to scrape metrics from endpoints. Set to the InferencePool.Spec.TargetPorts[0].Number if not defined.", + Deprecated: true, // no replacement, to be removed + } + ModelServerMetricsHTTPSInsecure = Flag{ + Name: "model-server-metrics-https-insecure-skip-verify", + DefValue: true, + Usage: "Disable certificate verification when using 'https' scheme for 'model-server-metrics-scheme'.", + } + RefreshMetricsInterval = Flag{ + Name: "refresh-metrics-interval", + DefValue: 50 * time.Millisecond, + Usage: "Interval to refresh metrics.", + } + RefreshPrometheusMetricsInterval = Flag{ + Name: "refresh-prometheus-metrics-interval", + DefValue: 5 * time.Second, + Usage: "Interval to flush Prometheus metrics.", + } + MetricsStalenessThreshold = Flag{ + Name: "metrics-staleness-threshold", + DefValue: 2 * time.Second, + Usage: "Duration after which metrics are considered stale. This is used to determine if an endpoint's metrics are fresh enough.", + } + TotalQueuedRequestsMetric = Flag{ + Name: "total-queued-requests-metric", + DefValue: "vllm:num_requests_waiting", + Usage: "Prometheus metric for the number of queued requests.", + } + TotalRunningRequestsMetric = Flag{ + Name: "total-running-requests-metric", + DefValue: "vllm:num_requests_running", + Usage: "Prometheus metric for the number of running requests.", + } + KVCacheUsagePercentageMetric = Flag{ + Name: "kv-cache-usage-percentage-metric", + DefValue: "vllm:kv_cache_usage_perc", + Usage: "Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).", + } + LoRAInfoMetric = Flag{ + Name: "lora-info-metric", + DefValue: "vllm:lora_requests_info", + Usage: "Prometheus metric for the LoRA info metrics (must be in vLLM label format).", + } + CacheInfoMetric = Flag{ + Name: "cache-info-metric", + DefValue: "vllm:cache_config_info", + Usage: "Prometheus metric for the cache info metrics.", + } + + // + // Diagnostics + // + LogVerbosity = Flag{ + Name: "v", + DefValue: logging.DEFAULT, + Usage: "Number for the log level verbosity.", + } + Tracing = Flag{ + Name: "tracing", + DefValue: true, + Usage: "Enables emitting traces.", + } + HealthChecking = Flag{ + Name: "health-checking", + DefValue: false, + Usage: "Enables health checking.", + } + MetricsPort = Flag{ + Name: "metrics-port", + DefValue: 9090, + Usage: "The metrics port exposed by EPP.", + } + GRPCHealthPort = Flag{ + Name: "grpc-health-port", + DefValue: 9003, + Usage: "The port used for gRPC liveness and readiness probes.", + } + EnablePprof = Flag{ + Name: "enable-pprof", + DefValue: true, + Usage: "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.", + } + CertPath = Flag{ + Name: "cert-path", + DefValue: "", + Usage: "The path to the certificate for secure serving. The certificate and private key files " + + "are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, " + + "then a self-signed certificate is used.", + } + EnableCertReload = Flag{ + Name: "enable-cert-reload", + DefValue: false, + Usage: "Enables certificate reloading of the certificates specified in --cert-path.", + } + SecureServing = Flag{ + Name: "secure-serving", + DefValue: true, + Usage: "Enables secure serving.", + } + MetricsEndpointAuth = Flag{ + Name: "metrics-endpoint-auth", + DefValue: true, + Usage: "Enables authentication and authorization of the metrics endpoint.", + } + + // + // Configuration + // + ConfigFile = Flag{ + Name: "config-file", + DefValue: "", + Usage: "The path to the configuration file.", + } + ConfigText = Flag{ + Name: "config-text", + DefValue: "", + Usage: "The configuration specified as text, in lieu of a file.", + } +) diff --git a/pkg/epp/options/options.go b/pkg/epp/options/options.go new file mode 100644 index 000000000..3688ae57f --- /dev/null +++ b/pkg/epp/options/options.go @@ -0,0 +1,181 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "flag" + "fmt" + "io" + "time" +) + +// Flag defines parameters needed to manage command line flags. +type Flag struct { + Name string // CLI flag name. + DefValue any // default value, required (to ensure Flag value type is defined). + Usage string // help text. + Deprecated bool // optional mark as deprecated. + ReplacedBy string // optional replacement message. +} + +// AddFlags registers a list of Flag definitions with a FlagSet (defaulting to +// flag.CommandLine if unspecified)), binding them to the pointer variables provided +// in the vars map. +func AddFlags(fs *flag.FlagSet, flags []Flag, vars map[string]any) error { + if len(flags) != len(vars) { + return fmt.Errorf("mismatch flags (%d) and vars (%d) count", len(flags), len(vars)) + } + + if fs == nil { + fs = flag.CommandLine + } + + for _, f := range flags { + if f.DefValue == nil { // a default value is required to determine types + return fmt.Errorf("flag %q must have a non-nil default value", f.Name) + } + + ptr, ok := vars[f.Name] + if !ok { // no destination variable + return fmt.Errorf("variable pointer for flag %q not provided", f.Name) + } + + switch def := f.DefValue.(type) { + case string: + p, ok := ptr.(*string) + if !ok { + return typeError(f.Name, ptr, "string") + } + *p = def + fs.StringVar(p, f.Name, def, f.Usage) + case int: + p, ok := ptr.(*int) + if !ok { + return typeError(f.Name, ptr, "int") + } + *p = def + fs.IntVar(p, f.Name, def, f.Usage) + case bool: + p, ok := ptr.(*bool) + if !ok { + return typeError(f.Name, ptr, "bool") + } + *p = def + fs.BoolVar(p, f.Name, def, f.Usage) + case time.Duration: + p, ok := ptr.(*time.Duration) + if !ok { + return typeError(f.Name, ptr, "time.Duration") + } + *p = def + fs.DurationVar(p, f.Name, def, f.Usage) + default: + return fmt.Errorf("unsupported flag type for %q: %T", f.Name, def) + } + + if f.Deprecated { // wrap the value with deprecation warning + fl := fs.Lookup(f.Name) + if fl == nil { + return fmt.Errorf("failed to lookup deprecated flag %q in set", f.Name) + } + fl.Value = &deprecatedValue{ + Value: fl.Value, + name: f.Name, + writer: fs.Output(), + replacedBy: f.ReplacedBy, + } + } + } + + return nil +} + +// deprecatedValue wraps a standard flag.Value to inject a warning message +// when the deprecated flag is used. +type deprecatedValue struct { + flag.Value + warned bool + name string + replacedBy string + writer io.Writer +} + +// Set is called when the flag is parsed from the command line. +func (d *deprecatedValue) Set(s string) error { + err := d.Value.Set(s) // delegate to the flag.Value + + if err == nil && !d.warned { + d.warned = true + if d.replacedBy != "" { + fmt.Fprintf(d.writer, "Warning: --%s is deprecated; use %s instead.\n", d.name, d.replacedBy) + } else { + fmt.Fprintf(d.writer, "Warning: --%s is deprecated and will be removed in an upcoming release.\n", d.name) + } + } + return err +} + +// typeError creates a clear error message for flag type mismatches. +func typeError(name string, got any, expected string) error { + return fmt.Errorf("flag %q: variable must be *%s, got %T", name, expected, got) +} + +// GetStringFlagValue retrieves the current value (default or set) of a string flag +// by name from the specified FlagSet (or flag.CommandLine if nil). +func GetStringFlagValue(fs *flag.FlagSet, name string) (string, error) { + if fs == nil { + fs = flag.CommandLine + } + + f := fs.Lookup(name) + if f == nil { + return "", fmt.Errorf("flag not found: %s", name) + } + val, ok := f.Value.(flag.Getter) + if !ok { + return "", fmt.Errorf("flag %s value does not support flag.Getter interface", name) + } + underlying := val.Get() + strptr, ok := underlying.(*string) + if !ok { + return "", fmt.Errorf("flag %s is not a string type, got %T", name, underlying) + } + return *strptr, nil +} + +// GetBoolFlagValue retrieves the current value (default or set) of a boolean flag +// by name from the specified FlagSet (or flag.CommandLine if nil). +func GetBoolFlagValue(fs *flag.FlagSet, name string) (bool, error) { + if fs == nil { + fs = flag.CommandLine + } + + f := fs.Lookup(name) + if f == nil { + return false, fmt.Errorf("flag not found: %s", name) + } + val, ok := f.Value.(flag.Getter) + if !ok { + return false, fmt.Errorf("flag %s value does not support flag.Getter interface", name) + } + underlying := val.Get() + boolptr, ok := underlying.(*bool) + if !ok { + return false, fmt.Errorf("flag %s is not a bool type, got %T", name, underlying) + } + return *boolptr, nil +}