diff --git a/.env.example b/.env.example index 405c4b2..9f6bcc7 100644 --- a/.env.example +++ b/.env.example @@ -32,6 +32,16 @@ S3_PREFIX=snapshots/ # ─── HTTP Admin Service ─────────────────────────────────────────────────────── HTTP_PORT=8081 +# ─── Metrics ────────────────────────────────────────────────────────────────── +# Leave disabled for local development, or set to dogstatsd to emit metrics to +# a DogStatsD-compatible agent such as Datadog Agent. +METRICS_BACKEND=none +# DOGSTATSD_ADDR defaults to DD_AGENT_HOST:DD_DOGSTATSD_PORT, or 127.0.0.1:8125. +# DOGSTATSD_ADDR=127.0.0.1:8125 +# Optional comma-separated tags. service/env/version also come from +# DD_SERVICE/DD_ENV/DD_VERSION when set. +METRICS_TAGS=service:version-guard + # ─── Tag Configuration ──────────────────────────────────────────────────────── # Customize which AWS resource tags to use for extracting metadata # Comma-separated lists - first match wins diff --git a/README.md b/README.md index 5bf101f..e6aac1d 100644 --- a/README.md +++ b/README.md @@ -172,6 +172,23 @@ The `endoflife` service serves patched EOL data for products with pending upstre Once running, open the Temporal Web UI at http://localhost:8233 to trigger and monitor workflows. +### Metrics + +Metrics are disabled by default. To emit scan aggregates to a DogStatsD-compatible +agent such as Datadog Agent, set: + +```bash +METRICS_BACKEND=dogstatsd +DOGSTATSD_ADDR=127.0.0.1:8125 # optional +METRICS_TAGS=service:version-guard,team:platform +``` + +Version Guard emits `version_guard.findings.*`, +`version_guard.compliance_percentage`, `version_guard.detection.duration_ms`, +`version_guard.inventory.*`, and `version_guard.scan.completed`. Standard +Datadog tags are also read from `DD_SERVICE`, `DD_ENV`, and `DD_VERSION` when +present. + ### Run Locally (manual) If you prefer running components individually: diff --git a/USAGE.md b/USAGE.md index 04ae9ac..dcbaab6 100644 --- a/USAGE.md +++ b/USAGE.md @@ -348,13 +348,29 @@ temporal workflow observe --workflow-id --namespace version-guard- #### Metrics to Track -Version Guard emits the following metrics (if Datadog enabled): +Version Guard emits the following metrics when `METRICS_BACKEND=dogstatsd`: - `version_guard.findings.red` - Critical issues count - `version_guard.findings.yellow` - Warning issues count +- `version_guard.findings.green` - Compliant resources count +- `version_guard.findings.unknown` - Resources with unknown lifecycle data - `version_guard.findings.total` - Total resources scanned - `version_guard.compliance_percentage` - Fleet compliance % - `version_guard.detection.duration_ms` - Scan duration -- `version_guard.inventory.fetch` - Inventory fetch success rate +- `version_guard.inventory.fetch` - Inventory fetch success (1) or failure (0) +- `version_guard.inventory.resources` - Resources returned by a successful inventory fetch +- `version_guard.scan.completed` - Completed scans count + +DogStatsD emission is disabled by default for local and OSS users. Enable it +with: + +```bash +METRICS_BACKEND=dogstatsd +DOGSTATSD_ADDR=127.0.0.1:8125 # optional; defaults to DD_AGENT_HOST/DD_DOGSTATSD_PORT or 127.0.0.1:8125 +METRICS_TAGS=service:version-guard,team:platform +``` + +If `DD_SERVICE`, `DD_ENV`, or `DD_VERSION` are set, Version Guard adds those as +metric tags unless the same tag key is already present in `METRICS_TAGS`. #### Logs diff --git a/cmd/server/main.go b/cmd/server/main.go index 909457f..88b831b 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "log/slog" + "net" "net/http" "os" "os/signal" @@ -22,6 +23,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/s3" vgconfig "github.com/block/Version-Guard/pkg/config" + "github.com/block/Version-Guard/pkg/emitters" "github.com/block/Version-Guard/pkg/eol" eolendoflife "github.com/block/Version-Guard/pkg/eol/endoflife" "github.com/block/Version-Guard/pkg/inventory" @@ -70,6 +72,11 @@ type ServerCLI struct { // Service configuration HTTPPort int `help:"HTTP admin port (POST /scan)" default:"8081" env:"HTTP_PORT"` + // Metrics configuration + MetricsBackend string `help:"Metrics backend: none or dogstatsd" default:"none" env:"METRICS_BACKEND"` + DogStatsDAddr string `help:"DogStatsD UDP address (defaults to DD_AGENT_HOST:DD_DOGSTATSD_PORT or 127.0.0.1:8125)" env:"DOGSTATSD_ADDR"` + MetricsTags string `help:"Comma-separated metric tags, e.g. service:version-guard,team:platform" env:"METRICS_TAGS"` + // Tag configuration (comma-separated lists for AWS resource tags) TagAppKeys string `help:"Comma-separated tag keys for application/service name" default:"app,application,service" env:"TAG_APP_KEYS"` @@ -93,6 +100,10 @@ type ServerCLI struct { // parseTagKeys parses a comma-separated string into a slice of tag keys func parseTagKeys(input string) []string { + return parseCSV(input) +} + +func parseCSV(input string) []string { if input == "" { return []string{} } @@ -107,6 +118,58 @@ func parseTagKeys(input string) []string { return result } +func parseMetricTags(input string) []string { + tags := parseCSV(input) + result := make([]string, 0, len(tags)) + for _, tag := range tags { + if strings.Contains(tag, ":") { + result = append(result, tag) + } + } + return result +} + +func metricTagKeyExists(tags []string, key string) bool { + prefix := key + ":" + for _, tag := range tags { + if strings.HasPrefix(tag, prefix) { + return true + } + } + return false +} + +func appendMetricTagIfMissing(tags []string, key, value string) []string { + if value == "" || metricTagKeyExists(tags, key) { + return tags + } + return append(tags, key+":"+value) +} + +func buildMetricTags(input string) []string { + tags := parseMetricTags(input) + tags = appendMetricTagIfMissing(tags, "service", os.Getenv("DD_SERVICE")) + tags = appendMetricTagIfMissing(tags, "service", "version-guard") + tags = appendMetricTagIfMissing(tags, "env", os.Getenv("DD_ENV")) + tags = appendMetricTagIfMissing(tags, "version", os.Getenv("DD_VERSION")) + return tags +} + +func dogStatsDAddr(configured string) string { + if configured != "" { + return configured + } + host := os.Getenv("DD_AGENT_HOST") + if host == "" { + host = "127.0.0.1" + } + port := os.Getenv("DD_DOGSTATSD_PORT") + if port == "" { + port = "8125" + } + return net.JoinHostPort(host, port) +} + // buildTagConfig creates a TagConfig from the environment variables func (s *ServerCLI) buildTagConfig() *wiz.TagConfig { return &wiz.TagConfig{ @@ -114,6 +177,22 @@ func (s *ServerCLI) buildTagConfig() *wiz.TagConfig { } } +func (s *ServerCLI) buildMetricsEmitter() (emitters.MetricsEmitter, func() error, error) { + switch strings.ToLower(strings.TrimSpace(s.MetricsBackend)) { + case "", "none", "noop": + return emitters.NoopMetricsEmitter{}, nil, nil + case "dogstatsd", "datadog": + addr := dogStatsDAddr(s.DogStatsDAddr) + metricsEmitter, err := emitters.NewDogStatsDMetricsEmitter(addr, buildMetricTags(s.MetricsTags)) + if err != nil { + return nil, nil, err + } + return metricsEmitter, metricsEmitter.Close, nil + default: + return nil, nil, fmt.Errorf("unsupported metrics backend %q", s.MetricsBackend) + } +} + //nolint:gocognit,gocyclo // startup wires many optional components; splitting further would fragment a linear init sequence func (s *ServerCLI) Run(_ *kong.Context) error { // Initialize structured logger @@ -138,6 +217,7 @@ func (s *ServerCLI) Run(_ *kong.Context) error { fmt.Printf(" Wiz Cache TTL: %d hours\n", s.WizCacheTTLHours) fmt.Printf(" AWS Region: %s\n", s.AWSRegion) fmt.Printf(" S3 Prefix: %s\n", s.S3Prefix) + fmt.Printf(" Metrics Backend: %s\n", s.MetricsBackend) fmt.Printf(" Tag Keys - App: %s\n", s.TagAppKeys) if s.ScheduleEnabled { fmt.Printf(" Schedule: enabled (cron: %s, id: %s, jitter: %s)\n", @@ -156,6 +236,21 @@ func (s *ServerCLI) Run(_ *kong.Context) error { st := memory.NewStore() fmt.Println("✓ In-memory store initialized") + metricsEmitter, closeMetricsEmitter, err := s.buildMetricsEmitter() + if err != nil { + return fmt.Errorf("failed to configure metrics: %w", err) + } + if closeMetricsEmitter != nil { + defer func() { + if closeErr := closeMetricsEmitter(); closeErr != nil { + fmt.Printf("metrics emitter shutdown error: %v\n", closeErr) + } + }() + fmt.Printf("✓ Metrics configured (backend: %s, address: %s)\n", s.MetricsBackend, dogStatsDAddr(s.DogStatsDAddr)) + } else { + fmt.Println("✓ Metrics disabled") + } + // Initialize S3 snapshot store var snapshotStore *snapshot.S3Store ctx := context.Background() @@ -357,7 +452,7 @@ func (s *ServerCLI) Run(_ *kong.Context) error { eolProviders, policyEngine, st, - ) + ).WithMetricsEmitter(metricsEmitter) w.RegisterActivityWithOptions(detectionActivities.FetchInventory, activity.RegisterOptions{Name: detection.FetchInventoryActivityName}) w.RegisterActivityWithOptions(detectionActivities.FetchEOLData, activity.RegisterOptions{Name: detection.FetchEOLDataActivityName}) w.RegisterActivityWithOptions(detectionActivities.DetectDrift, activity.RegisterOptions{Name: detection.DetectDriftActivityName}) diff --git a/cmd/server/main_test.go b/cmd/server/main_test.go new file mode 100644 index 0000000..32b93d4 --- /dev/null +++ b/cmd/server/main_test.go @@ -0,0 +1,54 @@ +package main + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuildMetricTagsDefaultsService(t *testing.T) { + t.Setenv("DD_SERVICE", "") + t.Setenv("DD_ENV", "") + t.Setenv("DD_VERSION", "") + + assert.Equal(t, []string{"service:version-guard"}, buildMetricTags("")) +} + +func TestBuildMetricTagsUsesDatadogEnvVars(t *testing.T) { + t.Setenv("DD_SERVICE", "custom-service") + t.Setenv("DD_ENV", "staging") + t.Setenv("DD_VERSION", "1.2.3") + + assert.Equal(t, []string{ + "team:platform", + "service:custom-service", + "env:staging", + "version:1.2.3", + }, buildMetricTags("team:platform")) +} + +func TestBuildMetricTagsDoesNotOverrideExplicitTags(t *testing.T) { + t.Setenv("DD_SERVICE", "env-service") + t.Setenv("DD_ENV", "prod") + t.Setenv("DD_VERSION", "1.2.3") + + assert.Equal(t, []string{ + "service:explicit-service", + "env:explicit-env", + "version:explicit-version", + }, buildMetricTags("service:explicit-service,env:explicit-env,version:explicit-version")) +} + +func TestDogStatsDAddrDefaults(t *testing.T) { + t.Setenv("DD_AGENT_HOST", "") + t.Setenv("DD_DOGSTATSD_PORT", "") + + assert.Equal(t, "127.0.0.1:8125", dogStatsDAddr("")) +} + +func TestDogStatsDAddrUsesDatadogEnvVars(t *testing.T) { + t.Setenv("DD_AGENT_HOST", "datadog-agent") + t.Setenv("DD_DOGSTATSD_PORT", "8126") + + assert.Equal(t, "datadog-agent:8126", dogStatsDAddr("")) +} diff --git a/go.mod b/go.mod index f804410..a5085bc 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/block/Version-Guard go 1.24.2 require ( + github.com/DataDog/datadog-go/v5 v5.8.3 github.com/alecthomas/kong v1.15.0 github.com/aws/aws-sdk-go-v2 v1.41.5 github.com/aws/aws-sdk-go-v2/config v1.32.14 @@ -17,6 +18,7 @@ require ( ) require ( + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.19.14 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 // indirect @@ -33,21 +35,21 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect github.com/aws/smithy-go v1.24.2 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/mock v1.6.0 // indirect + github.com/golang/mock v1.7.0-rc.1 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/nexus-rpc/sdk-go v0.6.0 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/robfig/cron v1.2.0 // indirect github.com/stretchr/objx v0.5.2 // indirect go.temporal.io/api v1.62.7 // indirect golang.org/x/net v0.49.0 // indirect golang.org/x/sys v0.40.0 // indirect golang.org/x/text v0.33.0 // indirect - golang.org/x/time v0.3.0 // indirect + golang.org/x/time v0.12.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260120221211-b8f7ae30c516 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260120221211-b8f7ae30c516 // indirect google.golang.org/protobuf v1.36.11 // indirect diff --git a/go.sum b/go.sum index 2b13f38..95da07e 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,8 @@ +github.com/DataDog/datadog-go/v5 v5.8.3 h1:s58CUJ9s8lezjhTNJO/SxkPBv2qZjS3ktpRSqGF5n0s= +github.com/DataDog/datadog-go/v5 v5.8.3/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= +github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/kong v1.15.0 h1:BVJstKbpO73zKpmIu+m/aLRrNmWwxXPIGTNin9VmLVI= @@ -44,8 +49,10 @@ github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng= github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a h1:yDWHCSQ40h88yih2JAcL6Ls/kVkSE8GFACTGVnMPruw= github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a/go.mod h1:7Ga40egUymuWXxAe151lTNnCv97MddSOVsjpPPkityA= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= @@ -54,8 +61,9 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= +github.com/golang/mock v1.7.0-rc.1 h1:YojYx61/OLFsiv6Rw1Z96LpldJIy31o+UHmwAUMJ6/U= +github.com/golang/mock v1.7.0-rc.1/go.mod h1:s42URUywIqd+OcERslBJvOjepvNymP31m3q8d/GkuRs= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= @@ -78,19 +86,29 @@ github.com/nexus-rpc/sdk-go v0.6.0 h1:QRgnP2zTbxEbiyWG/aXH8uSC5LV/Mg1fqb19jb4DBl github.com/nexus-rpc/sdk-go v0.6.0/go.mod h1:FHdPfVQwRuJFZFTF0Y2GOAxCrbIBNrcPna9slkGKPYk= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/robfig/cron v1.2.0 h1:ZjScXvvxeQ63Dbyxy76Fj3AT3Ut0aKsyd2/tl3DTMuQ= github.com/robfig/cron v1.2.0/go.mod h1:JGuDeoQd7Z6yL4zQhZ3OPEVHB7fL6Ka6skscFHfmt2k= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= @@ -113,11 +131,13 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -128,24 +148,31 @@ golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= -golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -163,5 +190,6 @@ google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/emitters/metrics.go b/pkg/emitters/metrics.go new file mode 100644 index 0000000..3afc055 --- /dev/null +++ b/pkg/emitters/metrics.go @@ -0,0 +1,175 @@ +package emitters + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/DataDog/datadog-go/v5/statsd" + + "github.com/block/Version-Guard/pkg/types" +) + +const ( + MetricFindingsTotal = "version_guard.findings.total" + MetricFindingsRed = "version_guard.findings.red" + MetricFindingsYellow = "version_guard.findings.yellow" + MetricFindingsGreen = "version_guard.findings.green" + MetricFindingsUnknown = "version_guard.findings.unknown" + MetricCompliancePercentage = "version_guard.compliance_percentage" + MetricDetectionDurationMS = "version_guard.detection.duration_ms" + MetricInventoryFetch = "version_guard.inventory.fetch" + MetricInventoryResources = "version_guard.inventory.resources" + MetricScanCompleted = "version_guard.scan.completed" +) + +// ScanMetrics contains the aggregate scan values emitted to metrics backends. +type ScanMetrics struct { + Summary *types.ScanSummary + DurationMillis int64 +} + +// InventoryFetchMetrics contains the result of fetching inventory. +type InventoryFetchMetrics struct { + ResourceType types.ResourceType + ResourceCount int + Success bool +} + +// MetricsEmitter emits aggregate Version Guard scan metrics. +type MetricsEmitter interface { + EmitScanMetrics(ctx context.Context, metrics ScanMetrics) error + EmitInventoryFetchMetrics(ctx context.Context, metrics InventoryFetchMetrics) error +} + +// NoopMetricsEmitter is the default metrics emitter. +type NoopMetricsEmitter struct{} + +// EmitScanMetrics implements MetricsEmitter without emitting anything. +func (NoopMetricsEmitter) EmitScanMetrics(context.Context, ScanMetrics) error { + return nil +} + +// EmitInventoryFetchMetrics implements MetricsEmitter without emitting anything. +func (NoopMetricsEmitter) EmitInventoryFetchMetrics(context.Context, InventoryFetchMetrics) error { + return nil +} + +type statsdClient interface { + Count(name string, value int64, tags []string, rate float64) error + Gauge(name string, value float64, tags []string, rate float64) error + Close() error +} + +// DogStatsDMetricsEmitter emits metrics to a DogStatsD-compatible endpoint. +type DogStatsDMetricsEmitter struct { + client statsdClient + tags []string +} + +// NewDogStatsDMetricsEmitter creates a DogStatsD-backed metrics emitter. +func NewDogStatsDMetricsEmitter(addr string, tags []string) (*DogStatsDMetricsEmitter, error) { + if strings.TrimSpace(addr) == "" { + return nil, fmt.Errorf("dogstatsd address is required") + } + + client, err := statsd.New(addr) + if err != nil { + return nil, fmt.Errorf("create dogstatsd client: %w", err) + } + + return newDogStatsDMetricsEmitter(client, tags), nil +} + +func newDogStatsDMetricsEmitter(client statsdClient, tags []string) *DogStatsDMetricsEmitter { + return &DogStatsDMetricsEmitter{ + client: client, + tags: append([]string(nil), tags...), + } +} + +// Close flushes and closes the underlying DogStatsD client. +func (e *DogStatsDMetricsEmitter) Close() error { + if e == nil || e.client == nil { + return nil + } + return e.client.Close() +} + +// EmitScanMetrics emits scan aggregate metrics as gauges and a completion count. +func (e *DogStatsDMetricsEmitter) EmitScanMetrics(ctx context.Context, metrics ScanMetrics) error { + if e == nil || e.client == nil || metrics.Summary == nil { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + tags := e.metricTags(metrics.Summary.ResourceType, metrics.Summary.CloudProvider) + var errs []error + + record := func(err error) { + if err != nil { + errs = append(errs, err) + } + } + + record(e.client.Gauge(MetricFindingsTotal, float64(metrics.Summary.TotalResources), tags, 1)) + record(e.client.Gauge(MetricFindingsRed, float64(metrics.Summary.RedCount), tags, 1)) + record(e.client.Gauge(MetricFindingsYellow, float64(metrics.Summary.YellowCount), tags, 1)) + record(e.client.Gauge(MetricFindingsGreen, float64(metrics.Summary.GreenCount), tags, 1)) + record(e.client.Gauge(MetricFindingsUnknown, float64(metrics.Summary.UnknownCount), tags, 1)) + record(e.client.Gauge(MetricCompliancePercentage, metrics.Summary.CompliancePercentage, tags, 1)) + if metrics.DurationMillis > 0 { + record(e.client.Gauge(MetricDetectionDurationMS, float64(metrics.DurationMillis), tags, 1)) + } + record(e.client.Count(MetricScanCompleted, 1, tags, 1)) + + return errors.Join(errs...) +} + +// EmitInventoryFetchMetrics emits inventory fetch success and resource count. +func (e *DogStatsDMetricsEmitter) EmitInventoryFetchMetrics(ctx context.Context, metrics InventoryFetchMetrics) error { + if e == nil || e.client == nil { + return nil + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + successValue := 0.0 + if metrics.Success { + successValue = 1.0 + } + + tags := e.metricTags(metrics.ResourceType, "") + errs := []error{ + e.client.Gauge(MetricInventoryFetch, successValue, tags, 1), + } + if metrics.Success { + errs = append(errs, e.client.Gauge(MetricInventoryResources, float64(metrics.ResourceCount), tags, 1)) + } + + return errors.Join(errs...) +} + +func (e *DogStatsDMetricsEmitter) metricTags(resourceType types.ResourceType, cloudProvider types.CloudProvider) []string { + tags := append([]string(nil), e.tags...) + tags = appendMetricTag(tags, "resource_type", strings.ToLower(resourceType.String())) + tags = appendMetricTag(tags, "cloud_provider", strings.ToLower(cloudProvider.String())) + return tags +} + +func appendMetricTag(tags []string, key, value string) []string { + if key == "" || value == "" { + return tags + } + return append(tags, key+":"+value) +} diff --git a/pkg/emitters/metrics_test.go b/pkg/emitters/metrics_test.go new file mode 100644 index 0000000..4b17ff3 --- /dev/null +++ b/pkg/emitters/metrics_test.go @@ -0,0 +1,133 @@ +package emitters + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/block/Version-Guard/pkg/types" +) + +type metricCall struct { + name string + tags []string + value float64 +} + +type fakeStatsDClient struct { + err error + gauges []metricCall + counts []metricCall + closed bool +} + +func (f *fakeStatsDClient) Gauge(name string, value float64, tags []string, _ float64) error { + f.gauges = append(f.gauges, metricCall{name: name, value: value, tags: append([]string(nil), tags...)}) + return f.err +} + +func (f *fakeStatsDClient) Count(name string, value int64, tags []string, _ float64) error { + f.counts = append(f.counts, metricCall{name: name, value: float64(value), tags: append([]string(nil), tags...)}) + return f.err +} + +func (f *fakeStatsDClient) Close() error { + f.closed = true + return f.err +} + +func TestDogStatsDMetricsEmitter_EmitScanMetrics(t *testing.T) { + client := &fakeStatsDClient{} + emitter := newDogStatsDMetricsEmitter(client, []string{"service:version-guard", "env:test"}) + + err := emitter.EmitScanMetrics(context.Background(), ScanMetrics{ + Summary: &types.ScanSummary{ + TotalResources: 4, + RedCount: 1, + YellowCount: 1, + GreenCount: 1, + UnknownCount: 1, + CompliancePercentage: 25, + ResourceType: types.ResourceTypeAurora, + CloudProvider: types.CloudProviderAWS, + }, + DurationMillis: 1234, + }) + require.NoError(t, err) + + assert.Len(t, client.gauges, 7) + assert.Equal(t, MetricFindingsTotal, client.gauges[0].name) + assert.Equal(t, 4.0, client.gauges[0].value) + assert.Equal(t, MetricCompliancePercentage, client.gauges[5].name) + assert.Equal(t, 25.0, client.gauges[5].value) + assert.Equal(t, MetricDetectionDurationMS, client.gauges[6].name) + assert.Equal(t, 1234.0, client.gauges[6].value) + assert.Equal(t, []string{ + "service:version-guard", + "env:test", + "resource_type:aurora", + "cloud_provider:aws", + }, client.gauges[0].tags) + + require.Len(t, client.counts, 1) + assert.Equal(t, MetricScanCompleted, client.counts[0].name) + assert.Equal(t, 1.0, client.counts[0].value) +} + +func TestDogStatsDMetricsEmitter_EmitInventoryFetchMetricsSuccess(t *testing.T) { + client := &fakeStatsDClient{} + emitter := newDogStatsDMetricsEmitter(client, []string{"service:version-guard"}) + + err := emitter.EmitInventoryFetchMetrics(context.Background(), InventoryFetchMetrics{ + ResourceType: types.ResourceTypeEKS, + ResourceCount: 10, + Success: true, + }) + require.NoError(t, err) + + require.Len(t, client.gauges, 2) + assert.Equal(t, MetricInventoryFetch, client.gauges[0].name) + assert.Equal(t, 1.0, client.gauges[0].value) + assert.Equal(t, []string{"service:version-guard", "resource_type:eks"}, client.gauges[0].tags) + assert.Equal(t, MetricInventoryResources, client.gauges[1].name) + assert.Equal(t, 10.0, client.gauges[1].value) +} + +func TestDogStatsDMetricsEmitter_EmitInventoryFetchMetricsFailure(t *testing.T) { + client := &fakeStatsDClient{} + emitter := newDogStatsDMetricsEmitter(client, nil) + + err := emitter.EmitInventoryFetchMetrics(context.Background(), InventoryFetchMetrics{ + ResourceType: types.ResourceTypeEKS, + Success: false, + }) + require.NoError(t, err) + + require.Len(t, client.gauges, 1) + assert.Equal(t, MetricInventoryFetch, client.gauges[0].name) + assert.Equal(t, 0.0, client.gauges[0].value) +} + +func TestDogStatsDMetricsEmitter_EmitScanMetricsReturnsClientErrors(t *testing.T) { + clientErr := errors.New("dogstatsd unavailable") + client := &fakeStatsDClient{err: clientErr} + emitter := newDogStatsDMetricsEmitter(client, nil) + + err := emitter.EmitScanMetrics(context.Background(), ScanMetrics{ + Summary: &types.ScanSummary{TotalResources: 1}, + }) + + require.Error(t, err) + assert.ErrorIs(t, err, clientErr) +} + +func TestDogStatsDMetricsEmitter_Close(t *testing.T) { + client := &fakeStatsDClient{} + emitter := newDogStatsDMetricsEmitter(client, nil) + + require.NoError(t, emitter.Close()) + assert.True(t, client.closed) +} diff --git a/pkg/workflow/detection/activities.go b/pkg/workflow/detection/activities.go index 02edd96..9d16e75 100644 --- a/pkg/workflow/detection/activities.go +++ b/pkg/workflow/detection/activities.go @@ -7,6 +7,7 @@ import ( "go.temporal.io/sdk/activity" + "github.com/block/Version-Guard/pkg/emitters" "github.com/block/Version-Guard/pkg/eol" "github.com/block/Version-Guard/pkg/inventory" "github.com/block/Version-Guard/pkg/policy" @@ -69,6 +70,7 @@ type MetricsInput struct { FindingsBatchID string Findings []*types.Finding ResourceType types.ResourceType + DurationMillis int64 } type MetricsResult struct { @@ -88,6 +90,7 @@ type Activities struct { EOLProviders map[types.ResourceType]eol.Provider Policy policy.VersionPolicy Store store.Store + MetricsEmitter emitters.MetricsEmitter resourceCache sync.Map } @@ -106,9 +109,19 @@ func NewActivities( EOLProviders: eolProviders, Policy: policy, Store: store, + MetricsEmitter: emitters.NoopMetricsEmitter{}, } } +// WithMetricsEmitter configures the emitter used by EmitMetrics. +func (a *Activities) WithMetricsEmitter(metricsEmitter emitters.MetricsEmitter) *Activities { + if metricsEmitter == nil { + metricsEmitter = emitters.NoopMetricsEmitter{} + } + a.MetricsEmitter = metricsEmitter + return a +} + func (a *Activities) loadResources(batchID string, fallback []*types.Resource) ([]*types.Resource, error) { if batchID == "" { return fallback, nil @@ -146,15 +159,40 @@ func (a *Activities) FetchInventory(ctx context.Context, input FetchInventoryInp source, ok := a.InventorySources[input.ResourceType] if !ok { + if a.MetricsEmitter != nil { + if emitErr := a.MetricsEmitter.EmitInventoryFetchMetrics(ctx, emitters.InventoryFetchMetrics{ + ResourceType: input.ResourceType, + Success: false, + }); emitErr != nil { + logger.Warn("Failed to emit inventory fetch metrics", "error", emitErr) + } + } return nil, fmt.Errorf("no inventory source registered for resource type: %s", input.ResourceType) } resources, err := source.ListResources(ctx, input.ResourceType) if err != nil { + if a.MetricsEmitter != nil { + if emitErr := a.MetricsEmitter.EmitInventoryFetchMetrics(ctx, emitters.InventoryFetchMetrics{ + ResourceType: input.ResourceType, + Success: false, + }); emitErr != nil { + logger.Warn("Failed to emit inventory fetch metrics", "error", emitErr) + } + } return nil, err } logger.Info("Inventory fetched", "count", len(resources)) + if a.MetricsEmitter != nil { + if emitErr := a.MetricsEmitter.EmitInventoryFetchMetrics(ctx, emitters.InventoryFetchMetrics{ + ResourceType: input.ResourceType, + ResourceCount: len(resources), + Success: true, + }); emitErr != nil { + logger.Warn("Failed to emit inventory fetch metrics", "error", emitErr) + } + } if input.ScanID == "" { return &InventoryResult{Resources: resources}, nil @@ -308,6 +346,9 @@ func (a *Activities) EmitMetrics(ctx context.Context, input MetricsInput) (*Metr if err != nil { return nil, err } + if input.FindingsBatchID != "" { + defer a.resourceCache.Delete(input.FindingsBatchID) + } logger := activity.GetLogger(ctx) logger.Info("Emitting metrics", "findingsCount", len(findings)) @@ -315,9 +356,13 @@ func (a *Activities) EmitMetrics(ctx context.Context, input MetricsInput) (*Metr // Calculate summary summary := &types.ScanSummary{ TotalResources: len(findings), + ResourceType: input.ResourceType, } for _, f := range findings { + if summary.CloudProvider == "" && f.CloudProvider != "" { + summary.CloudProvider = f.CloudProvider + } switch f.Status { case types.StatusRed: summary.RedCount++ @@ -341,10 +386,14 @@ func (a *Activities) EmitMetrics(ctx context.Context, input MetricsInput) (*Metr "green", summary.GreenCount, "compliance", summary.CompliancePercentage) - // TODO: Emit to Datadog/metrics system - - if input.FindingsBatchID != "" { - a.resourceCache.Delete(input.FindingsBatchID) + if a.MetricsEmitter != nil { + err = a.MetricsEmitter.EmitScanMetrics(ctx, emitters.ScanMetrics{ + Summary: summary, + DurationMillis: input.DurationMillis, + }) + if err != nil { + return nil, err + } } return &MetricsResult{ diff --git a/pkg/workflow/detection/activities_test.go b/pkg/workflow/detection/activities_test.go index 67605c1..ec1fe8a 100644 --- a/pkg/workflow/detection/activities_test.go +++ b/pkg/workflow/detection/activities_test.go @@ -1,12 +1,15 @@ package detection import ( + "context" + "errors" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.temporal.io/sdk/testsuite" + "github.com/block/Version-Guard/pkg/emitters" "github.com/block/Version-Guard/pkg/eol" eolmock "github.com/block/Version-Guard/pkg/eol/mock" "github.com/block/Version-Guard/pkg/inventory" @@ -41,6 +44,26 @@ func newActivityEnv() *testsuite.TestActivityEnvironment { return testSuite.NewTestActivityEnvironment() } +type recordingMetricsEmitter struct { + err error + metrics emitters.ScanMetrics + inventoryFetchMetric emitters.InventoryFetchMetrics + calls int + inventoryFetchCalls int +} + +func (r *recordingMetricsEmitter) EmitScanMetrics(_ context.Context, metrics emitters.ScanMetrics) error { + r.calls++ + r.metrics = metrics + return r.err +} + +func (r *recordingMetricsEmitter) EmitInventoryFetchMetrics(_ context.Context, metrics emitters.InventoryFetchMetrics) error { + r.inventoryFetchCalls++ + r.inventoryFetchMetric = metrics + return r.err +} + func testResources() []*types.Resource { return []*types.Resource{ { @@ -119,8 +142,27 @@ func TestFetchInventory_WithoutScanID_ReturnsInline(t *testing.T) { assert.Len(t, inv.Resources, 2) } +func TestFetchInventory_EmitsSuccessMetrics(t *testing.T) { + resources := testResources() + emitter := &recordingMetricsEmitter{} + act := newTestActivities(resources, nil).WithMetricsEmitter(emitter) + env := newActivityEnv() + env.RegisterActivity(act.FetchInventory) + + _, err := env.ExecuteActivity(act.FetchInventory, FetchInventoryInput{ + ResourceType: types.ResourceTypeAurora, + }) + require.NoError(t, err) + + require.Equal(t, 1, emitter.inventoryFetchCalls) + assert.Equal(t, types.ResourceTypeAurora, emitter.inventoryFetchMetric.ResourceType) + assert.Equal(t, 2, emitter.inventoryFetchMetric.ResourceCount) + assert.True(t, emitter.inventoryFetchMetric.Success) +} + func TestFetchInventory_SourceError(t *testing.T) { errSource := &invmock.InventorySource{ListErr: assert.AnError} + emitter := &recordingMetricsEmitter{} act := NewActivities( map[types.ResourceType]inventory.InventorySource{ types.ResourceTypeAurora: errSource, @@ -130,7 +172,7 @@ func TestFetchInventory_SourceError(t *testing.T) { }, policy.NewDefaultPolicy(), memory.NewStore(), - ) + ).WithMetricsEmitter(emitter) env := newActivityEnv() env.RegisterActivity(act.FetchInventory) @@ -139,6 +181,9 @@ func TestFetchInventory_SourceError(t *testing.T) { ResourceType: types.ResourceTypeAurora, }) require.Error(t, err) + require.Equal(t, 1, emitter.inventoryFetchCalls) + assert.Equal(t, types.ResourceTypeAurora, emitter.inventoryFetchMetric.ResourceType) + assert.False(t, emitter.inventoryFetchMetric.Success) } // --- FetchEOLData tests --- @@ -467,6 +512,54 @@ func TestEmitMetrics_EmptyFindings(t *testing.T) { assert.Equal(t, 0.0, metrics.Summary.CompliancePercentage) } +func TestEmitMetrics_UsesConfiguredMetricsEmitter(t *testing.T) { + emitter := &recordingMetricsEmitter{} + act := newTestActivities(nil, nil).WithMetricsEmitter(emitter) + env := newActivityEnv() + env.RegisterActivity(act.EmitMetrics) + + result, err := env.ExecuteActivity(act.EmitMetrics, MetricsInput{ + Findings: []*types.Finding{ + {ResourceID: "r1", Status: types.StatusRed, CloudProvider: types.CloudProviderAWS}, + {ResourceID: "r2", Status: types.StatusGreen, CloudProvider: types.CloudProviderAWS}, + }, + ResourceType: types.ResourceTypeAurora, + DurationMillis: 1234, + }) + require.NoError(t, err) + + var metrics MetricsResult + require.NoError(t, result.Get(&metrics)) + require.Equal(t, 1, emitter.calls) + require.NotNil(t, emitter.metrics.Summary) + assert.Equal(t, types.ResourceTypeAurora, emitter.metrics.Summary.ResourceType) + assert.Equal(t, types.CloudProviderAWS, emitter.metrics.Summary.CloudProvider) + assert.Equal(t, 2, emitter.metrics.Summary.TotalResources) + assert.Equal(t, 50.0, emitter.metrics.Summary.CompliancePercentage) + assert.Equal(t, int64(1234), emitter.metrics.DurationMillis) +} + +func TestEmitMetrics_EmitterErrorCleansCache(t *testing.T) { + emitErr := errors.New("emit failed") + act := newTestActivities(nil, nil).WithMetricsEmitter(&recordingMetricsEmitter{err: emitErr}) + act.resourceCache.Store("scan-123-findings", []*types.Finding{ + {ResourceID: "r1", Status: types.StatusRed}, + }) + + env := newActivityEnv() + env.RegisterActivity(act.EmitMetrics) + + _, err := env.ExecuteActivity(act.EmitMetrics, MetricsInput{ + FindingsBatchID: "scan-123-findings", + ResourceType: types.ResourceTypeAurora, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), emitErr.Error()) + + _, ok := act.resourceCache.Load("scan-123-findings") + assert.False(t, ok, "findings cache should be deleted even when metrics emission fails") +} + // --- Full pipeline cache lifecycle test --- func TestCacheLifecycle_EndToEnd(t *testing.T) { diff --git a/pkg/workflow/detection/workflow.go b/pkg/workflow/detection/workflow.go index 65c84bd..5e9f8b4 100644 --- a/pkg/workflow/detection/workflow.go +++ b/pkg/workflow/detection/workflow.go @@ -149,6 +149,7 @@ func DetectionWorkflow(ctx workflow.Context, input WorkflowInput) (*WorkflowOutp FindingsBatchID: detectResult.FindingsBatchID, Findings: detectResult.Findings, ResourceType: input.ResourceType, + DurationMillis: workflow.Now(ctx).Sub(startTime).Milliseconds(), }).Get(shortCtx, &metricsResult) if err != nil { logger.Error("Failed to emit metrics", "error", err)