diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index c8c16e8bd51..2ed7b43c35e 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -123,6 +123,13 @@ def add_metric( def add_dimension(self, name: str, value: str) -> None: self.provider.add_dimension(name=name, value=value) + def add_dimensions(self, **dimensions: str) -> None: + """Add a new set of dimensions creating an additional dimension array. + + Creates a new dimension set in the CloudWatch EMF Dimensions array. + """ + self.provider.add_dimensions(**dimensions) + def serialize_metric_set( self, metrics: dict | None = None, diff --git a/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py b/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py index f84e1b0ff42..243fc561593 100644 --- a/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py +++ b/aws_lambda_powertools/metrics/provider/cloudwatch_emf/cloudwatch.py @@ -94,6 +94,7 @@ def __init__( self.metadata_set = metadata_set if metadata_set is not None else {} self.timestamp: int | None = None + self.dimension_sets: list[dict[str, str]] = [] # Store multiple dimension sets self._metric_units = [unit.value for unit in MetricUnit] self._metric_unit_valid_options = list(MetricUnit.__members__) @@ -256,21 +257,30 @@ def serialize_metric_set( metric_names_and_values.update({metric_name: metric_value}) + # Build Dimensions array: primary set + additional dimension sets + dimension_arrays: list[list[str]] = [list(dimensions.keys())] + all_dimensions: dict[str, str] = dict(dimensions) + + # Add each additional dimension set + for dim_set in self.dimension_sets: + all_dimensions.update(dim_set) + dimension_arrays.append(list(dim_set.keys())) + return { "_aws": { "Timestamp": self.timestamp or int(datetime.datetime.now().timestamp() * 1000), # epoch "CloudWatchMetrics": [ { "Namespace": self.namespace, # "test_namespace" - "Dimensions": [list(dimensions.keys())], # [ "service" ] + "Dimensions": dimension_arrays, # [["service"], ["env", "region"]] "Metrics": metric_definition, }, ], }, # NOTE: Mypy doesn't recognize splats '** syntax' in TypedDict - **dimensions, # "service": "test_service" - **metadata, # type: ignore[typeddict-item] # "username": "test" - **metric_names_and_values, # "single_metric": 1.0 + **all_dimensions, # type: ignore[typeddict-item] # All dimension key-value pairs + **metadata, # type: ignore[typeddict-item] + **metric_names_and_values, } def add_dimension(self, name: str, value: str) -> None: @@ -316,6 +326,70 @@ def add_dimension(self, name: str, value: str) -> None: self.dimension_set[name] = value + def add_dimensions(self, **dimensions: str) -> None: + """Add a new set of dimensions creating an additional dimension array. + + Creates a new dimension set in the CloudWatch EMF Dimensions array. + + Example + ------- + **Add multiple dimension sets** + + metrics.add_dimensions(environment="prod", region="us-east-1") + + Parameters + ---------- + dimensions : str + Dimension key-value pairs as keyword arguments + """ + logger.debug(f"Adding dimension set: {dimensions}") + + if not dimensions: + warnings.warn( + "Empty dimensions dictionary provided", + category=PowertoolsUserWarning, + stacklevel=2, + ) + return + + sanitized = self._sanitize_dimensions(dimensions) + if not sanitized: + return + + self._validate_dimension_limit(sanitized) + + self.dimension_sets.append({**self.default_dimensions, **sanitized}) + + def _sanitize_dimensions(self, dimensions: dict[str, str]) -> dict[str, str]: + """Convert dimension values to strings and filter out empty ones.""" + sanitized: dict[str, str] = {} + + for name, value in dimensions.items(): + str_name = str(name) + str_value = str(value) + + if not str_name.strip() or not str_value.strip(): + warnings.warn( + f"Dimension {str_name} has empty name or value", + category=PowertoolsUserWarning, + stacklevel=2, + ) + continue + + sanitized[str_name] = str_value + + return sanitized + + def _validate_dimension_limit(self, new_dimensions: dict[str, str]) -> None: + """Validate that adding new dimensions won't exceed CloudWatch limits.""" + all_keys = set(self.dimension_set.keys()) + for ds in self.dimension_sets: + all_keys.update(ds.keys()) + all_keys.update(new_dimensions.keys()) + + if len(all_keys) > MAX_DIMENSIONS: + raise SchemaValidationError(f"Maximum dimensions ({MAX_DIMENSIONS}) exceeded") + def add_metadata(self, key: str, value: Any) -> None: """Adds high cardinal metadata for metrics object @@ -377,6 +451,7 @@ def clear_metrics(self) -> None: logger.debug("Clearing out existing metric set from memory") self.metric_set.clear() self.dimension_set.clear() + self.dimension_sets.clear() self.metadata_set.clear() self.set_default_dimensions(**self.default_dimensions) diff --git a/docs/core/metrics.md b/docs/core/metrics.md index 7d3dd46509d..830bc832e4f 100644 --- a/docs/core/metrics.md +++ b/docs/core/metrics.md @@ -22,7 +22,7 @@ If you're new to Amazon CloudWatch, there are five terminologies you must be awa * **Dimensions**. Metrics metadata in key-value format. They help you slice and dice metrics visualization, for example `ColdStart` metric by Payment `service`. * **Metric**. It's the name of the metric, for example: `SuccessfulBooking` or `UpdatedBooking`. * **Unit**. It's a value representing the unit of measure for the corresponding metric, for example: `Count` or `Seconds`. -* **Resolution**. It's a value representing the storage resolution for the corresponding metric. Metrics can be either Standard or High resolution. Read more [here](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics){target="_blank"}. +* **Resolution**. It's a value representing the storage resolution for the corresponding metric. Metrics can be either Standard or High resolution. Read more in the [high-resolution metrics documentation](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/publishingMetrics.html#high-resolution-metrics){target="_blank"}.
Terminology @@ -136,6 +136,27 @@ If you'd like to remove them at some point, you can use `clear_default_dimension **Note:** Dimensions with empty values will not be included. +### Adding multiple dimension sets + +You can use `add_dimensions` method to create multiple dimension sets in a single EMF blob. This allows you to aggregate metrics across different dimension combinations without emitting separate metric blobs. + +Each call to `add_dimensions` creates a new dimension array in the CloudWatch EMF output, enabling different views of the same metric data. + +=== "add_dimensions.py" + + ```python hl_lines="12-13" + --8<-- "examples/metrics/src/add_dimensions.py" + ``` + +=== "add_dimensions_output.json" + + ```json hl_lines="8-12" + --8<-- "examples/metrics/src/add_dimensions_output.json" + ``` + +???+ tip "When to use multiple dimension sets" + Use `add_dimensions` when you need to query the same metric with different dimension combinations. For example, you might want to see `SuccessfulBooking` aggregated by `environment` alone, or by both `environment` and `region`. + ### Changing default timestamp When creating metrics, we use the current timestamp. If you want to change the timestamp of all the metrics you create, utilize the `set_timestamp` function. You can specify a datetime object or an integer representing an epoch timestamp in milliseconds. @@ -233,12 +254,12 @@ The priority of the `function_name` dimension value is defined as: The following environment variable is available to configure Metrics at a global scope: -| Setting | Description | Environment variable | Default | -| ------------------ | ------------------------------------------------------------ | ---------------------------------- | ------- | -| **Namespace Name** | Sets **namespace** used for metrics. | `POWERTOOLS_METRICS_NAMESPACE` | `None` | -| **Service** | Sets **service** metric dimension across all metrics e.g. `payment` | `POWERTOOLS_SERVICE_NAME` | `None` | -| **Function Name** | Function name used as dimension for the **ColdStart** metric. | `POWERTOOLS_METRICS_FUNCTION_NAME` | `None` | -| **Disable Powertools Metrics** | **Disables** all metrics emitted by Powertools. | `POWERTOOLS_METRICS_DISABLED` | `None` | +| Setting | Description | Environment variable | Default | +| ------------------------------ | ------------------------------------------------------------------- | ---------------------------------- | ------- | +| **Namespace Name** | Sets **namespace** used for metrics. | `POWERTOOLS_METRICS_NAMESPACE` | `None` | +| **Service** | Sets **service** metric dimension across all metrics e.g. `payment` | `POWERTOOLS_SERVICE_NAME` | `None` | +| **Function Name** | Function name used as dimension for the **ColdStart** metric. | `POWERTOOLS_METRICS_FUNCTION_NAME` | `None` | +| **Disable Powertools Metrics** | **Disables** all metrics emitted by Powertools. | `POWERTOOLS_METRICS_DISABLED` | `None` | `POWERTOOLS_METRICS_NAMESPACE` is also available on a per-instance basis with the `namespace` parameter, which will consequently override the environment variable value. @@ -393,8 +414,8 @@ We provide a thin-wrapper on top of the most requested observability providers. Current providers: -| Provider | Notes | -| ------------------------------------- | -------------------------------------------------------- | +| Provider | Notes | +| ---------------------------------------- | -------------------------------------------------------- | | [Datadog](./datadog.md){target="_blank"} | Uses Datadog SDK and Datadog Lambda Extension by default | ## Testing your code diff --git a/examples/metrics/src/add_dimensions.py b/examples/metrics/src/add_dimensions.py new file mode 100644 index 00000000000..4bf99ba7ce5 --- /dev/null +++ b/examples/metrics/src/add_dimensions.py @@ -0,0 +1,17 @@ +from aws_lambda_powertools import Metrics +from aws_lambda_powertools.metrics import MetricUnit +from aws_lambda_powertools.utilities.typing import LambdaContext + +metrics = Metrics() + + +@metrics.log_metrics +def lambda_handler(event: dict, context: LambdaContext): + # Add primary dimension + metrics.add_dimension(name="service", value="booking") + + # Add multiple dimension sets for different aggregation views + metrics.add_dimensions(environment="prod", region="us-east-1") + metrics.add_dimensions(environment="prod") + + metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1) diff --git a/examples/metrics/src/add_dimensions_output.json b/examples/metrics/src/add_dimensions_output.json new file mode 100644 index 00000000000..26a22ba3466 --- /dev/null +++ b/examples/metrics/src/add_dimensions_output.json @@ -0,0 +1,25 @@ +{ + "_aws": { + "Timestamp": 1656620400000, + "CloudWatchMetrics": [ + { + "Namespace": "ServerlessAirline", + "Dimensions": [ + ["service"], + ["environment", "region"], + ["environment"] + ], + "Metrics": [ + { + "Name": "SuccessfulBooking", + "Unit": "Count" + } + ] + } + ] + }, + "service": "booking", + "environment": "prod", + "region": "us-east-1", + "SuccessfulBooking": [1.0] +} diff --git a/tests/functional/metrics/test_dimension_sets.py b/tests/functional/metrics/test_dimension_sets.py new file mode 100644 index 00000000000..fe3b86c3578 --- /dev/null +++ b/tests/functional/metrics/test_dimension_sets.py @@ -0,0 +1,265 @@ +""" +Tests for multiple dimension sets feature +""" + +from __future__ import annotations + +import json + +import pytest + +from aws_lambda_powertools.metrics import Metrics, MetricUnit, SchemaValidationError +from aws_lambda_powertools.metrics.provider.cloudwatch_emf.cloudwatch import AmazonCloudWatchEMFProvider + + +def test_add_dimensions_creates_multiple_dimension_sets(capsys): + # GIVEN a metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add multiple dimension sets + metrics.add_dimension(name="service", value="booking") + metrics.add_dimensions(environment="prod", region="us-east-1") + metrics.add_dimensions(environment="prod") + metrics.add_dimensions(region="us-east-1") + metrics.add_metric(name="SuccessfulRequests", unit=MetricUnit.Count, value=10) + + # THEN the serialized output should contain multiple dimension arrays + output = metrics.serialize_metric_set() + + assert len(output["_aws"]["CloudWatchMetrics"]) == 1 + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + + # Should have 4 dimension sets: primary + 3 added + assert len(dimensions) == 4 + assert dimensions[0] == ["service"] # Primary dimension set + assert set(dimensions[1]) == {"environment", "region"} + assert dimensions[2] == ["environment"] + assert dimensions[3] == ["region"] + + # All dimension values should be in the root + assert output["service"] == "booking" + assert output["environment"] == "prod" + assert output["region"] == "us-east-1" + assert output["SuccessfulRequests"] == [10.0] + + +def test_add_dimensions_with_metrics_wrapper(capsys): + # GIVEN a Metrics instance (not provider directly) + metrics = Metrics(namespace="TestApp", service="payment") + + # WHEN we use add_dimensions through the Metrics wrapper + @metrics.log_metrics + def handler(event, context): + metrics.add_dimensions(environment="staging", region="us-west-2") + metrics.add_dimensions(environment="staging") + metrics.add_metric(name="PaymentProcessed", unit=MetricUnit.Count, value=1) + + handler({}, {}) + + # THEN the output should contain multiple dimension sets + output = json.loads(capsys.readouterr().out.strip()) + + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + assert len(dimensions) == 3 # Primary (service) + 2 added + + # Primary dimension from service parameter + assert "service" in dimensions[0] + + # Check added dimension sets - they don't include service unless it's a default dimension + assert set(dimensions[1]) == {"environment", "region"} + assert set(dimensions[2]) == {"environment"} + + +def test_add_dimensions_with_default_dimensions(): + # GIVEN metrics with default dimensions + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + metrics.set_default_dimensions(tenant_id="123", application="api") + + # WHEN we add dimension sets after setting defaults + metrics.add_dimensions(environment="prod") + metrics.add_dimensions(region="eu-west-1") + metrics.add_metric(name="ApiCalls", unit=MetricUnit.Count, value=5) + + # THEN default dimensions should be included in all dimension sets + output = metrics.serialize_metric_set() + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + + # Each dimension set should include default dimensions + assert set(dimensions[1]) == {"tenant_id", "application", "environment"} + assert set(dimensions[2]) == {"tenant_id", "application", "region"} + + # Values should be in root + assert output["tenant_id"] == "123" + assert output["application"] == "api" + assert output["environment"] == "prod" + assert output["region"] == "eu-west-1" + + +def test_add_dimensions_duplicate_keys_last_value_wins(): + # GIVEN metrics with overlapping dimension keys + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets with duplicate keys + metrics.add_dimensions(environment="dev", region="us-east-1") + metrics.add_dimensions(environment="staging", region="us-west-2") + metrics.add_dimensions(environment="prod") # Last value for environment + metrics.add_metric(name="TestMetric", unit=MetricUnit.Count, value=1) + + # THEN the last value should be used in the root + output = metrics.serialize_metric_set() + + # Last values should win + assert output["environment"] == "prod" + assert output["region"] == "us-west-2" + + +def test_add_dimensions_empty_kwargs_warns(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we call add_dimensions without arguments + with pytest.warns(UserWarning, match="Empty dimensions dictionary"): + metrics.add_dimensions() + + # THEN no dimension set should be added + assert len(metrics.dimension_sets) == 0 + + +def test_add_dimensions_invalid_dimensions_skipped(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimensions with empty values + with pytest.warns(UserWarning, match="empty name or value"): + metrics.add_dimensions(key="") + + # THEN no dimension set should be added + assert len(metrics.dimension_sets) == 0 + + +def test_add_dimensions_exceeds_max_dimensions(): + # GIVEN metrics with many dimensions + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # Add 29 dimensions to primary set (max is 30) + for i in range(29): + metrics.add_dimension(name=f"dim{i}", value=f"val{i}") + + # WHEN we try to add dimension set that would exceed max + # THEN it should raise SchemaValidationError + with pytest.raises(SchemaValidationError, match="Maximum dimensions"): + metrics.add_dimensions(extra1="val1", extra2="val2") + + +def test_add_dimensions_converts_values_to_strings(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimensions with non-string values (using **dict for non-string values) + metrics.add_dimensions(**{"count": 123, "is_active": True, "ratio": 3.14}) + metrics.add_metric(name="TestMetric", unit=MetricUnit.Count, value=1) + + # THEN values should be converted to strings + output = metrics.serialize_metric_set() + assert output["count"] == "123" + assert output["is_active"] == "True" + assert output["ratio"] == "3.14" + + +def test_clear_metrics_clears_dimension_sets(capsys): + # GIVEN metrics with dimension sets + metrics = Metrics(namespace="TestApp", service="api") + + @metrics.log_metrics + def handler(event, context): + metrics.add_dimensions(environment="prod") + metrics.add_dimensions(region="us-east-1") + metrics.add_metric(name="Requests", unit=MetricUnit.Count, value=1) + + handler({}, {}) + + # WHEN we call clear_metrics (done automatically by decorator) + # THEN dimension_sets should be cleared + assert len(metrics.provider.dimension_sets) == 0 + + +def test_add_dimensions_order_preserved(): + # GIVEN metrics instance + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets in specific order + metrics.add_dimension(name="service", value="api") + metrics.add_dimensions(environment="prod", region="us-east-1") + metrics.add_dimensions(environment="prod") + metrics.add_dimensions(region="us-east-1") + metrics.add_metric(name="TestMetric", unit=MetricUnit.Count, value=1) + + # THEN dimension sets should appear in order added + output = metrics.serialize_metric_set() + dimensions = output["_aws"]["CloudWatchMetrics"][0]["Dimensions"] + + assert dimensions[0] == ["service"] + assert set(dimensions[1]) == {"environment", "region"} + assert dimensions[2] == ["environment"] + assert dimensions[3] == ["region"] + + +def test_add_dimensions_with_metadata(): + # GIVEN metrics with metadata + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets and metadata + metrics.add_dimensions(environment="prod") + metrics.add_metadata(key="request_id", value="abc-123") + metrics.add_metric(name="ApiLatency", unit=MetricUnit.Milliseconds, value=150) + + # THEN both should be in output + output = metrics.serialize_metric_set() + + assert "environment" in output + assert output["request_id"] == "abc-123" + # Primary dimension_set + 1 additional dimension set + assert len(output["_aws"]["CloudWatchMetrics"][0]["Dimensions"]) == 2 + + +def test_multiple_metrics_with_dimension_sets(): + # GIVEN metrics with multiple metrics and dimension sets + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add multiple metrics with dimension sets + metrics.add_dimensions(environment="prod", region="us-east-1") + metrics.add_dimensions(environment="prod") + metrics.add_metric(name="SuccessCount", unit=MetricUnit.Count, value=100) + metrics.add_metric(name="ErrorCount", unit=MetricUnit.Count, value=5) + metrics.add_metric(name="Latency", unit=MetricUnit.Milliseconds, value=250) + + # THEN all metrics should share the same dimension sets + output = metrics.serialize_metric_set() + + assert len(output["_aws"]["CloudWatchMetrics"][0]["Metrics"]) == 3 + # Primary (empty) + 2 added dimension sets + assert len(output["_aws"]["CloudWatchMetrics"][0]["Dimensions"]) == 3 + assert output["SuccessCount"] == [100.0] + assert output["ErrorCount"] == [5.0] + assert output["Latency"] == [250.0] + + +def test_add_dimensions_with_high_resolution_metrics(): + # GIVEN metrics with high resolution + metrics = AmazonCloudWatchEMFProvider(namespace="TestApp") + + # WHEN we add dimension sets with high-resolution metrics + metrics.add_dimensions(function="process_order") + metrics.add_metric( + name="ProcessingTime", + unit=MetricUnit.Milliseconds, + value=45, + resolution=1, # High resolution + ) + + # THEN dimension sets should work with high-resolution metrics + output = metrics.serialize_metric_set() + + # Primary (empty) + 1 added dimension set + assert len(output["_aws"]["CloudWatchMetrics"][0]["Dimensions"]) == 2 + assert output["_aws"]["CloudWatchMetrics"][0]["Metrics"][0]["StorageResolution"] == 1