diff --git a/cmd/cert-checker/main.go b/cmd/cert-checker/main.go index 8590c5f0a36..4e4e880c9e3 100644 --- a/cmd/cert-checker/main.go +++ b/cmd/cert-checker/main.go @@ -4,13 +4,14 @@ import ( "bytes" "context" "crypto/x509" - "encoding/json" "flag" "fmt" + "net" "net/netip" "os" "regexp" "slices" + "strings" "sync" "sync/atomic" "time" @@ -38,6 +39,36 @@ import ( "github.com/letsencrypt/boulder/sa" ) +type certCheckerMetrics struct { + checkerLatency prometheus.Histogram + checkerTimestamp prometheus.Gauge + checkerGoodCount prometheus.Gauge + checkerBadCount prometheus.Gauge +} + +func NewCertCheckerMetrics(stats prometheus.Registerer) *certCheckerMetrics { + checkerLatency := promauto.With(stats).NewHistogram(prometheus.HistogramOpts{ + Name: "cert_checker_latency", + Help: "Histogram of latencies a cert-checker worker takes to complete a batch", + }) + + checkerTimestamp := promauto.With(stats).NewGauge(prometheus.GaugeOpts{ + Name: "cert_checker_last_run_timestamp", + Help: "Timestamp of cert-checker's last run", + }) + + checkerGoodCount := promauto.With(stats).NewGauge(prometheus.GaugeOpts{ + Name: "cert_checker_good_count", + Help: "Cert-checker count of good certificates", + }) + + checkerBadCount := promauto.With(stats).NewGauge(prometheus.GaugeOpts{ + Name: "cert_checker_bad_count", + Help: "Cert-checker count of bad certificates", + }) + return &certCheckerMetrics{checkerLatency, checkerTimestamp, checkerGoodCount, checkerBadCount} +} + // For defense-in-depth in addition to using the PA & its identPolicy to check // domain names we also perform a check against the regex's from the // forbiddenDomains array @@ -62,25 +93,9 @@ var batchSize = 1000 type report struct { begin time.Time end time.Time - GoodCerts int64 `json:"good-certs"` - BadCerts int64 `json:"bad-certs"` - DbErrs int64 `json:"db-errs"` - Entries map[string]reportEntry `json:"entries"` -} - -func (r *report) dump() error { - content, err := json.MarshalIndent(r, "", " ") - if err != nil { - return err - } - fmt.Fprintln(os.Stdout, string(content)) - return nil -} - -type reportEntry struct { - Valid bool `json:"valid"` - SANs []string `json:"sans"` - Problems []string `json:"problems,omitempty"` + GoodCerts int64 `json:"good-certs"` + BadCerts int64 `json:"bad-certs"` + DbErrs int64 `json:"db-errs"` } // certDB is an interface collecting the borp.DbMap functions that the various @@ -134,7 +149,6 @@ func newChecker(saDbMap certDB, certs: make(chan *corepb.Certificate, batchSize), rMu: new(sync.Mutex), clock: clk, - issuedReport: report{Entries: make(map[string]reportEntry)}, checkPeriod: period, acceptableValidityDurations: avd, lints: lints, @@ -265,26 +279,17 @@ func (c *certChecker) getCerts(ctx context.Context) error { return nil } -func (c *certChecker) processCerts(ctx context.Context, wg *sync.WaitGroup, badResultsOnly bool) { +func (c *certChecker) processCerts(ctx context.Context) { for cert := range c.certs { sans, problems := c.checkCert(ctx, cert) valid := len(problems) == 0 - c.rMu.Lock() - if !badResultsOnly || (badResultsOnly && !valid) { - c.issuedReport.Entries[cert.Serial] = reportEntry{ - Valid: valid, - SANs: sans, - Problems: problems, - } - } - c.rMu.Unlock() if !valid { atomic.AddInt64(&c.issuedReport.BadCerts, 1) + c.logger.AuditErr("certificate error found", nil, map[string]any{"serial": cert.Serial, "sans": sans, "problems": problems}) } else { atomic.AddInt64(&c.issuedReport.GoodCerts, 1) } } - wg.Done() } // Extensions that we allow in certificates @@ -540,8 +545,19 @@ type Config struct { cmd.HostnamePolicyConfig Workers int `validate:"required,min=1"` - // Deprecated: this is ignored, and cert checker always checks both expired and unexpired. - UnexpiredOnly bool + // LookupDNSAuthority can only be specified with PushgatewayService. It's a single + // : of the DNS server to be used for resolution + // of pushgateway backends. If the address contains a hostname it will be resolved + // using system DNS. If the address contains a port, the client will use it + // directly, otherwise port 53 is used. + LookupDNSAuthority string `validate:"excluded_without=PushgatewayService,required_with=PushgatewayService,omitempty,ip|hostname|hostname_port"` + // PushgatewayService entry contains a service and domain name that will be used + // to construct a SRV DNS query to lookup pushgateway backends. For example: if + // the resource record is 'foo.service.consul', then the 'Service' is 'foo' + // and the 'Domain' is 'service.consul'. The expected dNSName to be + // authenticated in the server certificate would be 'foo.service.consul'. + PushgatewayService *cmd.ServiceDomain `validate:"required_with=LookupDNSAuthority"` + // Deprecated: cert-checker only logs bad results anyway. BadResultsOnly bool CheckPeriod config.Duration @@ -577,6 +593,47 @@ type Config struct { Syslog cmd.SyslogConfig } +// getPushgatewayURL resolves svc via SRV+A lookups against dnsAuthority and +// returns an http:// URL whose host is an IP address. Both lookups go through +// dnsAuthority (typically Consul DNS) because the system resolver can't answer +// queries for the .consul domain. The SRV target is then flattened to an IP +// because the returned URL is consumed by net/http via cmd.PushMetrics, which +// resolves hostnames using the system resolver. Scheme is fixed to http: +// pushgateway is assumed to be on an internal network +func getPushgatewayURL(ctx context.Context, dnsAuthority string, svc cmd.ServiceDomain) (string, error) { + host, port, err := net.SplitHostPort(dnsAuthority) + if err != nil { + // Assume only hostname or IPv4 address was specified. + host = dnsAuthority + port = "53" + } + r := &net.Resolver{ + PreferGo: true, + Dial: func(ctx context.Context, network, _ string) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, network, net.JoinHostPort(host, port)) + }, + } + _, targets, err := r.LookupSRV(ctx, svc.Service, "tcp", svc.Domain) + if err != nil { + return "", fmt.Errorf("SRV lookup of _%s._tcp.%s failed: %w", svc.Service, svc.Domain, err) + } + if len(targets) == 0 { + return "", fmt.Errorf("SRV lookup of _%s._tcp.%s returned 0 results", svc.Service, svc.Domain) + } + // Flatten the SRV target to an IP using the same Consul authority; net/http + // (used downstream) would otherwise try to resolve names like + // *.addr.dc1.consul via the system resolver and fail. + target := strings.TrimSuffix(targets[0].Target, ".") + addrs, err := r.LookupHost(ctx, target) + if err != nil { + return "", fmt.Errorf("A/AAAA lookup of %q failed: %w", target, err) + } + if len(addrs) == 0 { + return "", fmt.Errorf("A/AAAA lookup of %q returned 0 results", target) + } + return fmt.Sprintf("http://%s", net.JoinHostPort(addrs[0], fmt.Sprint(targets[0].Port))), nil +} + func main() { configFile := flag.String("config", "", "File path to the configuration file for this service") flag.Parse() @@ -594,6 +651,9 @@ func main() { logger := cmd.NewLogger(config.Syslog) cmd.LogStartup(logger) + reg := prometheus.NewRegistry() + metrics := NewCertCheckerMetrics(reg) + acceptableValidityDurations := make(map[time.Duration]bool) if len(config.CertChecker.AcceptableValidityDurations) > 0 { for _, entry := range config.CertChecker.AcceptableValidityDurations { @@ -616,11 +676,6 @@ func main() { saDbMap, err := sa.InitWrappedDb(config.CertChecker.DB, prometheus.DefaultRegisterer, logger) cmd.FailOnError(err, "While initializing dbMap") - checkerLatency := promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "cert_checker_latency", - Help: "Histogram of latencies a cert-checker worker takes to complete a batch", - }) - pa, err := policy.New(config.PA.Identifiers, config.PA.Challenges, logger) cmd.FailOnError(err, "Failed to create PA") @@ -663,23 +718,34 @@ func main() { fmt.Fprintf(os.Stderr, "# Processing certificates using %d workers\n", config.CertChecker.Workers) wg := new(sync.WaitGroup) for range config.CertChecker.Workers { - wg.Add(1) - go func() { + wg.Go(func() { s := checker.clock.Now() - checker.processCerts(context.TODO(), wg, config.CertChecker.BadResultsOnly) - checkerLatency.Observe(checker.clock.Since(s).Seconds()) - }() + checker.processCerts(context.TODO()) + metrics.checkerLatency.Observe(checker.clock.Since(s).Seconds()) + }) } wg.Wait() - fmt.Fprintf( - os.Stderr, - "# Finished processing certificates, report length: %d, good: %d, bad: %d\n", - len(checker.issuedReport.Entries), - checker.issuedReport.GoodCerts, - checker.issuedReport.BadCerts, - ) - err = checker.issuedReport.dump() - cmd.FailOnError(err, "Failed to dump results: %s\n") + logger.AuditInfo("Finished processing certificates", checker.issuedReport) + + metrics.checkerTimestamp.SetToCurrentTime() + metrics.checkerGoodCount.Set(float64(checker.issuedReport.GoodCerts)) + metrics.checkerBadCount.Set(float64(checker.issuedReport.BadCerts)) + + if config.CertChecker.PushgatewayService != nil { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + pushgatewayURL, err := getPushgatewayURL(ctx, config.CertChecker.LookupDNSAuthority, *config.CertChecker.PushgatewayService) + if err != nil { + logger.Errf("failed to get pushgateway URL: %s", err) + } else { + err = cmd.PushMetrics("cert-checker", pushgatewayURL, reg, logger) + if err != nil { + logger.Errf("failed to push metrics to pushgateway: %s", err) + } else { + logger.Debugf("pushed metrics to pushgateway at %s", pushgatewayURL) + } + } + } if checker.issuedReport.BadCerts > 0 { os.Exit(1) diff --git a/cmd/cert-checker/main_test.go b/cmd/cert-checker/main_test.go index 9ffcf1beafa..4c6f08141db 100644 --- a/cmd/cert-checker/main_test.go +++ b/cmd/cert-checker/main_test.go @@ -14,16 +14,19 @@ import ( "log" "math/big" mrand "math/rand/v2" + "net" + "net/url" "os" "slices" + "strconv" "strings" - "sync" "testing" "time" "github.com/jmhodges/clock" "google.golang.org/protobuf/types/known/timestamppb" + "github.com/letsencrypt/boulder/cmd" "github.com/letsencrypt/boulder/core" corepb "github.com/letsencrypt/boulder/core/proto" "github.com/letsencrypt/boulder/ctpolicy/loglist" @@ -336,7 +339,8 @@ func TestGetAndProcessCerts(t *testing.T) { fc := clock.NewFake() fc.Set(fc.Now().Add(time.Hour)) - checker := newChecker(saDbMap, fc, pa, kp, time.Hour, testValidityDurations, nil, blog.NewMock()) + mocklog := blog.NewMock() + checker := newChecker(saDbMap, fc, pa, kp, time.Hour, testValidityDurations, nil, mocklog) sa, err := sa.NewSQLStorageAuthority(saDbMap, saDbMap, nil, 0, fc, blog.NewMock(), metrics.NoopRegisterer) test.AssertNotError(t, err, "Couldn't create SA to insert certificates") saCleanUp := test.ResetBoulderTestDatabase(t) @@ -375,11 +379,9 @@ func TestGetAndProcessCerts(t *testing.T) { err = checker.getCerts(context.Background()) test.AssertNotError(t, err, "Failed to retrieve certificates") test.AssertEquals(t, len(checker.certs), 5) - wg := new(sync.WaitGroup) - wg.Add(1) - checker.processCerts(context.Background(), wg, false) + checker.processCerts(context.Background()) test.AssertEquals(t, checker.issuedReport.BadCerts, int64(5)) - test.AssertEquals(t, len(checker.issuedReport.Entries), 5) + test.AssertEquals(t, len(mocklog.GetAllMatching("certificate error found")), 5) } // mismatchedCountDB is a certDB implementation for `getCerts` that returns one @@ -507,30 +509,6 @@ func TestGetCertsLate(t *testing.T) { } } -func TestSaveReport(t *testing.T) { - r := report{ - begin: time.Time{}, - end: time.Time{}, - GoodCerts: 2, - BadCerts: 1, - Entries: map[string]reportEntry{ - "020000000000004b475da49b91da5c17": { - Valid: true, - }, - "020000000000004d1613e581432cba7e": { - Valid: true, - }, - "020000000000004e402bc21035c6634a": { - Valid: false, - Problems: []string{"None really..."}, - }, - }, - } - - err := r.dump() - test.AssertNotError(t, err, "Failed to dump results") -} - func TestIsForbiddenDomain(t *testing.T) { // Note: These testcases are not an exhaustive representation of domains // Boulder won't issue for, but are instead testing the defense-in-depth @@ -698,3 +676,38 @@ func TestPrecertCorrespond(t *testing.T) { } t.Fatalf("expected precert correspondence problem, but got: %v", problems) } + +func TestGetPushgatewayURL(t *testing.T) { + ctx := context.Background() + t.Run("happy path", func(t *testing.T) { + gotURL, err := getPushgatewayURL(ctx, "consul.service.consul:53", + cmd.ServiceDomain{Service: "redisratelimits", Domain: "service.consul"}) + test.AssertNotError(t, err, "") + + parsed, err := url.Parse(gotURL) + test.AssertNotError(t, err, "returned URL should be parseable") + test.AssertEquals(t, parsed.Scheme, "http") + + host, port, err := net.SplitHostPort(parsed.Host) + test.AssertNotError(t, err, "URL host should contain a port") + test.AssertNotNil(t, net.ParseIP(host), "host should be an IP (LookupHost flatten step)") + portNum, err := strconv.Atoi(port) + test.AssertNotError(t, err, "port should be numeric") + test.Assert(t, portNum > 0 && portNum < 65536, "port should be in valid range") + }) + t.Run("DNS authority no port specified", func(t *testing.T) { + _, err := getPushgatewayURL(ctx, "consul.service.consul", + cmd.ServiceDomain{Service: "redisratelimits", Domain: "service.consul"}) + test.AssertNotError(t, err, "") + }) + t.Run("SRV not found", func(t *testing.T) { + _, err := getPushgatewayURL(ctx, "consul.service.consul:53", + cmd.ServiceDomain{Service: "doesnotexist", Domain: "service.consul"}) + test.AssertError(t, err, "") + }) + t.Run("DNS authority unreachable", func(t *testing.T) { + _, err := getPushgatewayURL(ctx, "doesnotexist.invalid:53", + cmd.ServiceDomain{Service: "redisratelimits", Domain: "service.consul"}) + test.AssertError(t, err, "") + }) +} diff --git a/cmd/shell.go b/cmd/shell.go index 2b3509f5aaa..42d0f474f8d 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -26,6 +26,7 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/prometheus/client_golang/prometheus/push" "github.com/redis/go-redis/v9" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" @@ -575,3 +576,15 @@ func WaitForSignal() { signal.Notify(sigChan, syscall.SIGHUP) <-sigChan } + +func PushMetrics(jobname, pushgatewayURL string, gatherer prometheus.Gatherer, logger blog.Logger) error { + hostname, err := os.Hostname() + if err != nil { + logger.Warningf("error getting hostname: %s", err) + hostname = "unknown" + } + return push.New(pushgatewayURL, jobname). + Gatherer(gatherer). + Grouping("instance", hostname). + Push() +} diff --git a/test/config-next/cert-checker.json b/test/config-next/cert-checker.json index 1afa679c7ef..8dccca4e23a 100644 --- a/test/config-next/cert-checker.json +++ b/test/config-next/cert-checker.json @@ -6,7 +6,6 @@ }, "hostnamePolicyFile": "test/ident-policy.yaml", "workers": 16, - "unexpiredOnly": true, "badResultsOnly": true, "checkPeriod": "72h", "acceptableValidityDurations": [ diff --git a/test/config/cert-checker.json b/test/config/cert-checker.json index 5ca23dc550b..4f51507f249 100644 --- a/test/config/cert-checker.json +++ b/test/config/cert-checker.json @@ -6,7 +6,6 @@ }, "hostnamePolicyFile": "test/ident-policy.yaml", "workers": 16, - "unexpiredOnly": true, "badResultsOnly": true, "checkPeriod": "72h", "acceptableValidityDurations": [ diff --git a/vendor/github.com/prometheus/client_golang/prometheus/push/push.go b/vendor/github.com/prometheus/client_golang/prometheus/push/push.go new file mode 100644 index 00000000000..e524aa1303e --- /dev/null +++ b/vendor/github.com/prometheus/client_golang/prometheus/push/push.go @@ -0,0 +1,356 @@ +// Copyright 2015 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package push provides functions to push metrics to a Pushgateway. It uses a +// builder approach. Create a Pusher with New and then add the various options +// by using its methods, finally calling Add or Push, like this: +// +// // Easy case: +// push.New("http://example.org/metrics", "my_job").Gatherer(myRegistry).Push() +// +// // Complex case: +// push.New("http://example.org/metrics", "my_job"). +// Collector(myCollector1). +// Collector(myCollector2). +// Grouping("zone", "xy"). +// Client(&myHTTPClient). +// BasicAuth("top", "secret"). +// Add() +// +// See the examples section for more detailed examples. +// +// See the documentation of the Pushgateway to understand the meaning of +// the grouping key and the differences between Push and Add: +// https://github.com/prometheus/pushgateway +package push + +import ( + "bytes" + "context" + "encoding/base64" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + contentTypeHeader = "Content-Type" + // base64Suffix is appended to a label name in the request URL path to + // mark the following label value as base64 encoded. + base64Suffix = "@base64" +) + +var errJobEmpty = errors.New("job name is empty") + +// HTTPDoer is an interface for the one method of http.Client that is used by Pusher +type HTTPDoer interface { + Do(*http.Request) (*http.Response, error) +} + +// Pusher manages a push to the Pushgateway. Use New to create one, configure it +// with its methods, and finally use the Add or Push method to push. +type Pusher struct { + error error + + url, job string + grouping map[string]string + + gatherers prometheus.Gatherers + registerer prometheus.Registerer + + client HTTPDoer + header http.Header + useBasicAuth bool + username, password string + + expfmt expfmt.Format +} + +// New creates a new Pusher to push to the provided URL with the provided job +// name (which must not be empty). You can use just host:port or ip:port as url, +// in which case “http://” is added automatically. Alternatively, include the +// schema in the URL. However, do not include the “/metrics/jobs/…” part. +func New(url, job string) *Pusher { + var ( + reg = prometheus.NewRegistry() + err error + ) + if job == "" { + err = errJobEmpty + } + if !strings.Contains(url, "://") { + url = "http://" + url + } + url = strings.TrimSuffix(url, "/") + + return &Pusher{ + error: err, + url: url, + job: job, + grouping: map[string]string{}, + gatherers: prometheus.Gatherers{reg}, + registerer: reg, + client: &http.Client{}, + expfmt: expfmt.NewFormat(expfmt.TypeProtoDelim), + } +} + +// Push collects/gathers all metrics from all Collectors and Gatherers added to +// this Pusher. Then, it pushes them to the Pushgateway configured while +// creating this Pusher, using the configured job name and any added grouping +// labels as grouping key. All previously pushed metrics with the same job and +// other grouping labels will be replaced with the metrics pushed by this +// call. (It uses HTTP method “PUT” to push to the Pushgateway.) +// +// Push returns the first error encountered by any method call (including this +// one) in the lifetime of the Pusher. +func (p *Pusher) Push() error { + return p.push(context.Background(), http.MethodPut) +} + +// PushContext is like Push but includes a context. +// +// If the context expires before HTTP request is complete, an error is returned. +func (p *Pusher) PushContext(ctx context.Context) error { + return p.push(ctx, http.MethodPut) +} + +// Add works like push, but only previously pushed metrics with the same name +// (and the same job and other grouping labels) will be replaced. (It uses HTTP +// method “POST” to push to the Pushgateway.) +func (p *Pusher) Add() error { + return p.push(context.Background(), http.MethodPost) +} + +// AddContext is like Add but includes a context. +// +// If the context expires before HTTP request is complete, an error is returned. +func (p *Pusher) AddContext(ctx context.Context) error { + return p.push(ctx, http.MethodPost) +} + +// Gatherer adds a Gatherer to the Pusher, from which metrics will be gathered +// to push them to the Pushgateway. The gathered metrics must not contain a job +// label of their own. +// +// For convenience, this method returns a pointer to the Pusher itself. +func (p *Pusher) Gatherer(g prometheus.Gatherer) *Pusher { + p.gatherers = append(p.gatherers, g) + return p +} + +// Collector adds a Collector to the Pusher, from which metrics will be +// collected to push them to the Pushgateway. The collected metrics must not +// contain a job label of their own. +// +// For convenience, this method returns a pointer to the Pusher itself. +func (p *Pusher) Collector(c prometheus.Collector) *Pusher { + if p.error == nil { + p.error = p.registerer.Register(c) + } + return p +} + +// Error returns the error that was encountered. +func (p *Pusher) Error() error { + return p.error +} + +// Grouping adds a label pair to the grouping key of the Pusher, replacing any +// previously added label pair with the same label name. Note that setting any +// labels in the grouping key that are already contained in the metrics to push +// will lead to an error. +// +// For convenience, this method returns a pointer to the Pusher itself. +func (p *Pusher) Grouping(name, value string) *Pusher { + if p.error == nil { + if !model.LabelName(name).IsValid() { + p.error = fmt.Errorf("grouping label has invalid name: %s", name) + return p + } + p.grouping[name] = value + } + return p +} + +// Client sets a custom HTTP client for the Pusher. For convenience, this method +// returns a pointer to the Pusher itself. +// Pusher only needs one method of the custom HTTP client: Do(*http.Request). +// Thus, rather than requiring a fully fledged http.Client, +// the provided client only needs to implement the HTTPDoer interface. +// Since *http.Client naturally implements that interface, it can still be used normally. +func (p *Pusher) Client(c HTTPDoer) *Pusher { + p.client = c + return p +} + +// Header sets a custom HTTP header for the Pusher's client. For convenience, this method +// returns a pointer to the Pusher itself. +func (p *Pusher) Header(header http.Header) *Pusher { + p.header = header + return p +} + +// BasicAuth configures the Pusher to use HTTP Basic Authentication with the +// provided username and password. For convenience, this method returns a +// pointer to the Pusher itself. +func (p *Pusher) BasicAuth(username, password string) *Pusher { + p.useBasicAuth = true + p.username = username + p.password = password + return p +} + +// Format configures the Pusher to use an encoding format given by the +// provided expfmt.Format. The default format is expfmt.FmtProtoDelim and +// should be used with the standard Prometheus Pushgateway. Custom +// implementations may require different formats. For convenience, this +// method returns a pointer to the Pusher itself. +func (p *Pusher) Format(format expfmt.Format) *Pusher { + p.expfmt = format + return p +} + +// Delete sends a “DELETE” request to the Pushgateway configured while creating +// this Pusher, using the configured job name and any added grouping labels as +// grouping key. Any added Gatherers and Collectors added to this Pusher are +// ignored by this method. +// +// Delete returns the first error encountered by any method call (including this +// one) in the lifetime of the Pusher. +func (p *Pusher) Delete() error { + if p.error != nil { + return p.error + } + req, err := http.NewRequest(http.MethodDelete, p.fullURL(), nil) + if err != nil { + return err + } + if p.header != nil { + req.Header = p.header + } + if p.useBasicAuth { + req.SetBasicAuth(p.username, p.password) + } + resp, err := p.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusAccepted { + body, _ := io.ReadAll(resp.Body) // Ignore any further error as this is for an error message only. + return fmt.Errorf("unexpected status code %d while deleting %s: %s", resp.StatusCode, p.fullURL(), body) + } + return nil +} + +func (p *Pusher) push(ctx context.Context, method string) error { + if p.error != nil { + return p.error + } + mfs, err := p.gatherers.Gather() + if err != nil { + return err + } + buf := &bytes.Buffer{} + enc := expfmt.NewEncoder(buf, p.expfmt) + // Check for pre-existing grouping labels: + for _, mf := range mfs { + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + if l.GetName() == "job" { + return fmt.Errorf("pushed metric %s (%s) already contains a job label", mf.GetName(), m) + } + if _, ok := p.grouping[l.GetName()]; ok { + return fmt.Errorf( + "pushed metric %s (%s) already contains grouping label %s", + mf.GetName(), m, l.GetName(), + ) + } + } + } + if err := enc.Encode(mf); err != nil { + return fmt.Errorf( + "failed to encode metric family %s, error is %w", + mf.GetName(), err) + } + } + req, err := http.NewRequestWithContext(ctx, method, p.fullURL(), buf) + if err != nil { + return err + } + if p.header != nil { + req.Header = p.header + } + if p.useBasicAuth { + req.SetBasicAuth(p.username, p.password) + } + req.Header.Set(contentTypeHeader, string(p.expfmt)) + resp, err := p.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + // Depending on version and configuration of the PGW, StatusOK or StatusAccepted may be returned. + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted { + body, _ := io.ReadAll(resp.Body) // Ignore any further error as this is for an error message only. + return fmt.Errorf("unexpected status code %d while pushing to %s: %s", resp.StatusCode, p.fullURL(), body) + } + return nil +} + +// fullURL assembles the URL used to push/delete metrics and returns it as a +// string. The job name and any grouping label values containing a '/' will +// trigger a base64 encoding of the affected component and proper suffixing of +// the preceding component. Similarly, an empty grouping label value will be +// encoded as base64 just with a single `=` padding character (to avoid an empty +// path component). If the component does not contain a '/' but other special +// characters, the usual url.QueryEscape is used for compatibility with older +// versions of the Pushgateway and for better readability. +func (p *Pusher) fullURL() string { + urlComponents := []string{} + if encodedJob, base64 := encodeComponent(p.job); base64 { + urlComponents = append(urlComponents, "job"+base64Suffix, encodedJob) + } else { + urlComponents = append(urlComponents, "job", encodedJob) + } + for ln, lv := range p.grouping { + if encodedLV, base64 := encodeComponent(lv); base64 { + urlComponents = append(urlComponents, ln+base64Suffix, encodedLV) + } else { + urlComponents = append(urlComponents, ln, encodedLV) + } + } + return fmt.Sprintf("%s/metrics/%s", p.url, strings.Join(urlComponents, "/")) +} + +// encodeComponent encodes the provided string with base64.RawURLEncoding in +// case it contains '/' and as "=" in case it is empty. If neither is the case, +// it uses url.QueryEscape instead. It returns true in the former two cases. +func encodeComponent(s string) (string, bool) { + if s == "" { + return "=", true + } + if strings.Contains(s, "/") { + return base64.RawURLEncoding.EncodeToString([]byte(s)), true + } + return url.QueryEscape(s), false +} diff --git a/vendor/modules.txt b/vendor/modules.txt index dd9a2887df9..095d6d1cb2d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -252,6 +252,7 @@ github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promauto github.com/prometheus/client_golang/prometheus/promhttp github.com/prometheus/client_golang/prometheus/promhttp/internal +github.com/prometheus/client_golang/prometheus/push # github.com/prometheus/client_model v0.6.1 ## explicit; go 1.19 github.com/prometheus/client_model/go