From 0cdef0c53dea9d846430a4ed74fa56dc07d82f57 Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 18 Jun 2026 16:39:02 -0700 Subject: [PATCH] Add cluster and job name labels to Shared Pathways Service metrics PiperOrigin-RevId: 934608656 --- .../shared_pathways_service/isc_pathways.py | 18 +++++----- .../metrics_collector.py | 34 +++++++++++++++---- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/pathwaysutils/experimental/shared_pathways_service/isc_pathways.py b/pathwaysutils/experimental/shared_pathways_service/isc_pathways.py index 5e12272..f34fcad 100644 --- a/pathwaysutils/experimental/shared_pathways_service/isc_pathways.py +++ b/pathwaysutils/experimental/shared_pathways_service/isc_pathways.py @@ -278,14 +278,16 @@ def __init__( self.proxy_server_image = proxy_server_image self.proxy_options = proxy_options or ProxyOptions() self._old_jax_platforms = None - raw_collector = ( - metrics_collector.MetricsCollector(self.project) - if collect_service_metrics - else None - ) - self.metrics_collector = metrics_collector.SafeMetricsCollector( - raw_collector - ) + if collect_service_metrics: + raw_collector = metrics_collector.MetricsCollector( + self.project, self.cluster, self._proxy_job_name + ) + self.metrics_collector = metrics_collector.SafeMetricsCollector( + raw_collector + ) + else: + self.metrics_collector = metrics_collector.SafeMetricsCollector(None) + self.start_time = None self._old_jax_backend_target = None self._old_jax_platforms_config = None diff --git a/pathwaysutils/experimental/shared_pathways_service/metrics_collector.py b/pathwaysutils/experimental/shared_pathways_service/metrics_collector.py index 9ea0cae..b37caec 100644 --- a/pathwaysutils/experimental/shared_pathways_service/metrics_collector.py +++ b/pathwaysutils/experimental/shared_pathways_service/metrics_collector.py @@ -75,8 +75,15 @@ class MetricsCollector: """Collects usage metrics for Shared Pathways Service and reports to Cloud Monitoring.""" - def __init__(self, project_id: str): + def __init__( + self, + project_id: str, + cluster_name: str, + job_name: str | None = None, + ): self.project_id = project_id + self.cluster_name = cluster_name + self.job_name = job_name self.client = monitoring_v3.MetricServiceClient() self.project_name = f"projects/{self.project_id}" self._lock = threading.Lock() @@ -185,6 +192,9 @@ def _send_metric( ): """Queues a single metric in the buffer.""" default_labels = {"client_instance_id": self._instance_id} + default_labels["cluster_name"] = self.cluster_name + if self.job_name: + default_labels["job_name"] = self.job_name if metric_labels: default_labels.update(metric_labels) _logger.info( @@ -244,11 +254,23 @@ def _create_metric_descriptor( "description": description, "display_name": display_name, "unit": unit, - "labels": [{ - "key": "client_instance_id", - "value_type": "STRING", - "description": "Unique execution identifier", - }], + "labels": [ + { + "key": "client_instance_id", + "value_type": "STRING", + "description": "Unique execution identifier", + }, + { + "key": "cluster_name", + "value_type": "STRING", + "description": "GKE cluster name", + }, + { + "key": "job_name", + "value_type": "STRING", + "description": "Pathways proxy job name", + }, + ], }, ) _logger.info("Created metric descriptor: %s", metric_type)