From ef9fd1f80706f8e954eafaec73427fb24f25dc6e Mon Sep 17 00:00:00 2001 From: Rodrigo Brandao Date: Thu, 26 Mar 2026 11:31:27 -0700 Subject: [PATCH 1/3] Adding OTEL test sample --- .../activity/otel/__init__.py | 30 +++ .../hosting/core/app/agent_application.py | 36 ++- .../hosting/core/telemetry/__init__.py | 40 +++ .../hosting/core/telemetry/attributes.py | 50 ++++ .../hosting/core/telemetry/core/__init__.py | 21 ++ .../core/telemetry/core/_agents_telemetry.py | 106 ++++++++ .../core/telemetry/core/base_span_wrapper.py | 79 ++++++ .../hosting/core/telemetry/core/resource.py | 18 ++ .../telemetry/core/simple_span_wrapper.py | 40 +++ .../hosting/core/telemetry/core/type_defs.py | 7 + .../hosting/core/telemetry/utils.py | 28 ++ .../microsoft-agents-hosting-core/setup.py | 2 + test_samples/otel/env.TEMPLATE | 16 ++ test_samples/otel/requirements.txt | 14 + test_samples/otel/src/__init__.py | 0 test_samples/otel/src/agent.py | 86 ++++++ test_samples/otel/src/card.py | 73 +++++ test_samples/otel/src/get_user_info.py | 18 ++ test_samples/otel/src/main.py | 14 + test_samples/otel/src/start_server.py | 40 +++ test_samples/otel/src/telemetry.py | 107 ++++++++ test_samples/otel/start_dashboard.ps1 | 1 + .../_tests/test_delta_metric_reader.py | 212 +++++++++++++++ tests/_common/fixtures/telemetry.py | 169 ++++++++++++ tests/_common/telemetry_utils.py | 38 +++ tests/hosting_core/telemetry/__init__.py | 0 .../telemetry/test_agents_telemetry.py | 109 ++++++++ .../telemetry/test_simple_span_wrapper.py | 252 ++++++++++++++++++ tests/hosting_core/telemetry/test_utils.py | 72 +++++ 29 files changed, 1666 insertions(+), 12 deletions(-) create mode 100644 libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/__init__.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/attributes.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/__init__.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/_agents_telemetry.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/base_span_wrapper.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/resource.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/simple_span_wrapper.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/type_defs.py create mode 100644 libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/utils.py create mode 100644 test_samples/otel/env.TEMPLATE create mode 100644 test_samples/otel/requirements.txt create mode 100644 test_samples/otel/src/__init__.py create mode 100644 test_samples/otel/src/agent.py create mode 100644 test_samples/otel/src/card.py create mode 100644 test_samples/otel/src/get_user_info.py create mode 100644 test_samples/otel/src/main.py create mode 100644 test_samples/otel/src/start_server.py create mode 100644 test_samples/otel/src/telemetry.py create mode 100644 test_samples/otel/start_dashboard.ps1 create mode 100644 tests/_common/_tests/test_delta_metric_reader.py create mode 100644 tests/_common/fixtures/telemetry.py create mode 100644 tests/_common/telemetry_utils.py create mode 100644 tests/hosting_core/telemetry/__init__.py create mode 100644 tests/hosting_core/telemetry/test_agents_telemetry.py create mode 100644 tests/hosting_core/telemetry/test_simple_span_wrapper.py create mode 100644 tests/hosting_core/telemetry/test_utils.py diff --git a/libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py b/libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py new file mode 100644 index 00000000..329e7b13 --- /dev/null +++ b/libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import functools +import logging +import types +from typing import Any, Collection, Literal + + +class AgentMetrics: + + tracer: Tracer + + +class AgentsSDKInstrumentor(BaseInstrumentor): + """An OpenTelemetry Instrumentor for the Microsoft Agents SDK.""" + + def _instrument(self, **kwargs: Any) -> None: + """Instruments the Microsoft Agents SDK.""" + try: + import microsoft_agents.activity.otel._agent_activity_tracing + except ImportError as exc: + logging.warning( + "Failed to import Microsoft Agents SDK instrumentation module: %s", + exc, + ) + + def _uninstrument(self, **kwargs: Any) -> None: + """Uninstruments the Microsoft Agents SDK.""" + # No uninstrumentation logic needed for this instrumentation + pass diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py index d0eb6c1e..60875a0e 100644 --- a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py @@ -120,9 +120,11 @@ def __init__( "ApplicationOptions.storage is required and was not configured.", stack_info=True, ) - raise ApplicationError(""" + raise ApplicationError( + """ The `ApplicationOptions.storage` property is required and was not configured. - """) + """ + ) if options.long_running_messages and ( not options.adapter or not options.bot_app_id @@ -131,10 +133,12 @@ def __init__( "ApplicationOptions.long_running_messages requires an adapter and bot_app_id.", stack_info=True, ) - raise ApplicationError(""" + raise ApplicationError( + """ The `ApplicationOptions.long_running_messages` property is unavailable because no adapter or `bot_app_id` was configured. - """) + """ + ) if options.adapter: self._adapter = options.adapter @@ -176,10 +180,12 @@ def adapter(self) -> ChannelServiceAdapter: "AgentApplication.adapter(): self._adapter is not configured.", stack_info=True, ) - raise ApplicationError(""" + raise ApplicationError( + """ The AgentApplication.adapter property is unavailable because it was not configured when creating the AgentApplication. - """) + """ + ) return self._adapter @@ -197,10 +203,12 @@ def auth(self) -> Authorization: "AgentApplication.auth(): self._auth is not configured.", stack_info=True, ) - raise ApplicationError(""" + raise ApplicationError( + """ The `AgentApplication.auth` property is unavailable because no Auth options were configured. - """) + """ + ) return self._auth @@ -584,10 +592,12 @@ async def sign_in_success(context: TurnContext, state: TurnState, connection_id: f"Failed to register sign-in success handler for route handler {func.__name__}", stack_info=True, ) - raise ApplicationError(""" + raise ApplicationError( + """ The `AgentApplication.on_sign_in_success` method is unavailable because no Auth options were configured. - """) + """ + ) return func def on_sign_in_failure( @@ -618,10 +628,12 @@ async def sign_in_failure(context: TurnContext, state: TurnState, connection_id: f"Failed to register sign-in failure handler for route handler {func.__name__}", stack_info=True, ) - raise ApplicationError(""" + raise ApplicationError( + """ The `AgentApplication.on_sign_in_failure` method is unavailable because no Auth options were configured. - """) + """ + ) return func def error( diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/__init__.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/__init__.py new file mode 100644 index 00000000..b054f57c --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/__init__.py @@ -0,0 +1,40 @@ +## DESIGN +# This design is similar to how error codes are implemented and maintained. +# The alternative was to inject all of this telemetry logic inline with the code it instruments. +# While some spans are simple, others require more involved mapping of attributes or +# even emitting metrics. +# +# This design hides the "mess" of telemetry to one location rather than throughout the codebase. +# +# NOTE: this module should not be auto-loaded from __init__.py in order to avoid + +from . import attributes +from .core import ( + agents_telemetry, + SERVICE_NAME, + SERVICE_VERSION, + RESOURCE, + AttributeMap, + BaseSpanWrapper, + SimpleSpanWrapper, +) + +from .utils import ( + format_scopes, + get_conversation_id, + get_delivery_mode, +) + +__all__ = [ + "attributes", + "agents_telemetry", + "format_scopes", + "get_conversation_id", + "get_delivery_mode", + "AttributeMap", + "BaseSpanWrapper", + "SimpleSpanWrapper", + "SERVICE_NAME", + "SERVICE_VERSION", + "RESOURCE", +] diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/attributes.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/attributes.py new file mode 100644 index 00000000..379a443a --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/attributes.py @@ -0,0 +1,50 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +ACTIVITY_DELIVERY_MODE = "activity.delivery_mode" +ACTIVITY_CHANNEL_ID = "activity.channel_id" +ACTIVITY_ID = "activity.id" +ACTIVITY_COUNT = "activities.count" +ACTIVITY_TYPE = "activity.type" + +AGENTIC_USER_ID = "agentic.user_id" +AGENTIC_INSTANCE_ID = "agentic.instance_id" + +APP_ID = "agent.app_id" + +ATTACHMENT_ID = "activity.attachment.id" +ATTACHMENT_COUNT = "activity.attachments.count" + +AUTH_HANDLER_ID = "auth.handler.id" +AUTH_METHOD = "auth.method" +AUTH_SCOPES = "auth.scopes" +AUTH_SUCCESS = "auth.success" + +CONNECTION_NAME = "auth.connection.name" +CONVERSATION_ID = "activity.conversation.id" + +HTTP_METHOD = "http.method" +HTTP_STATUS_CODE = "http.status_code" + +IS_AGENTIC = "is_agentic_request" + +KEY_COUNT = "storage.keys.count" + +OPERATION = "operation" + +ROUTE_AUTHORIZED = "route.authorized" +ROUTE_IS_INVOKE = "route.is_invoke" +ROUTE_IS_AGENTIC = "route.is_agentic" +ROUTE_MATCHED = "route.matched" + +SERVICE_URL = "service_url" +STORAGE_OPERATION = "storage.operation" + +TOKEN_SERVICE_ENDPOINT = "agents.token_service.endpoint" + +USER_ID = "user.id" + +VIEW_ID = "view.id" + +# for missing values +UNKNOWN = "unknown" diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/__init__.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/__init__.py new file mode 100644 index 00000000..71b29dcd --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from . import resource +from ._agents_telemetry import agents_telemetry +from .type_defs import AttributeMap, SpanCallback +from .simple_span_wrapper import SimpleSpanWrapper +from .base_span_wrapper import BaseSpanWrapper +from .resource import SERVICE_NAME, SERVICE_VERSION, RESOURCE + +__all__ = [ + "agents_telemetry", + "resource", + "AttributeMap", + "SpanCallback", + "SimpleSpanWrapper", + "BaseSpanWrapper", + "SERVICE_NAME", + "SERVICE_VERSION", + "RESOURCE", +] diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/_agents_telemetry.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/_agents_telemetry.py new file mode 100644 index 00000000..34c13ec4 --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/_agents_telemetry.py @@ -0,0 +1,106 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import time +import logging +from collections.abc import Iterator + +from contextlib import contextmanager + +from opentelemetry.metrics import Meter +from opentelemetry import metrics, trace +from opentelemetry.trace import Tracer, Span + +from microsoft_agents.activity import TurnContextProtocol + +from .resource import SERVICE_NAME, SERVICE_VERSION +from .type_defs import SpanCallback + +logger = logging.getLogger(__name__) + + +class _AgentsTelemetry: + + def __init__(self): + """Initializes the AgentsTelemetry instance with the given tracer and meter, or creates new ones if not provided + + :param tracer: Optional OpenTelemetry Tracer instance to use for creating spans. If not provided, a new tracer will be created with the service name and version from constants. + :param meter: Optional OpenTelemetry Meter instance to use for recording metrics. If not provided, a new meter will be created with the service name and version from constants. + """ + self._tracer = trace.get_tracer(SERVICE_NAME, SERVICE_VERSION) + self._meter = metrics.get_meter(SERVICE_NAME, SERVICE_VERSION) + + @property + def tracer(self) -> Tracer: + """Returns the OpenTelemetry tracer instance for creating spans""" + return self._tracer + + @property + def meter(self) -> Meter: + """Returns the OpenTelemetry meter instance for recording metrics""" + return self._meter + + def _extract_attributes_from_context( + self, turn_context: TurnContextProtocol + ) -> dict: + """Helper method to extract common attributes from the TurnContext for span and metric recording""" + + # This can be expanded to extract common attributes for spans and metrics from the context + attributes = {} + attributes["activity.type"] = turn_context.activity.type + attributes["agent.is_agentic"] = turn_context.activity.is_agentic_request() + if turn_context.activity.from_property: + attributes["from.id"] = turn_context.activity.from_property.id + if turn_context.activity.recipient: + attributes["recipient.id"] = turn_context.activity.recipient.id + if turn_context.activity.conversation: + attributes["conversation.id"] = turn_context.activity.conversation.id + attributes["channel_id"] = turn_context.activity.channel_id + attributes["message.text.length"] = ( + len(turn_context.activity.text) if turn_context.activity.text else 0 + ) + return attributes + + @contextmanager + def start_as_current_span( + self, + span_name: str, + callback: SpanCallback | None = None, + ) -> Iterator[Span]: + """Context manager for starting a timed span that records duration and success/failure status, and invokes a callback with the results + + :param span_name: The name of the span to start + :param callback: Optional callback function that will be called with the span, duration in milliseconds, and any exception that was raised (or None if successful) when the span is ended + :return: An iterator that yields the started span, which will be ended when the context manager exits + """ + + with self._tracer.start_as_current_span(span_name) as span: + + start = time.time() + exception: Exception | None = None + + try: + yield span # execute the operation in the with block + except Exception as e: + exception = e + raise + finally: + + success = exception is None + + end = time.time() + duration = (end - start) * 1000 # milliseconds + + if success: + span.add_event(f"{span_name} completed", {"duration_ms": duration}) + span.set_status(trace.Status(trace.StatusCode.OK)) + if callback: + callback(span, duration, None) + else: + if callback: + callback(span, duration, exception) + + span.set_status(trace.Status(trace.StatusCode.ERROR)) + + +agents_telemetry = _AgentsTelemetry() diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/base_span_wrapper.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/base_span_wrapper.py new file mode 100644 index 00000000..841275e9 --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/base_span_wrapper.py @@ -0,0 +1,79 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from __future__ import annotations + +import logging + +from abc import ABC, abstractmethod +from contextlib import ExitStack +from typing import ContextManager + +from opentelemetry.trace import Span + +logger = logging.getLogger(__name__) + + +class BaseSpanWrapper(ABC): + """Wrapper around OTEL spans for SDK-specific telemetry""" + + def __init__(self): + self._span: Span | None = None + self._active: bool = False + + self._exit_stack = ExitStack() + + @property + def otel_span(self) -> Span | None: + """Returns the underlying OTEL span if it is active, or None if the span has not been started or has already ended. This can be used to access OTEL-specific functionality or attributes of the span when needed, while still providing a higher-level abstraction through the BaseSpanWrapper class.""" + return self._span + + @property + def active(self) -> bool: + """Indicates whether the BaseSpanWrapper is currently active. This can be used to prevent operations on an inactive BaseSpanWrapper, and to check the BaseSpanWrapper's lifecycle state.""" + return self._active + + @abstractmethod + def _start_span(self) -> ContextManager[Span]: + """Abstract method that must be implemented by subclasses to define how the BaseSpanWrapper is started and what attributes are set on the BaseSpanWrapper. This method should return a context manager that yields the started BaseSpanWrapper, allowing the base BaseSpanWrapper class to manage the BaseSpanWrapper's lifecycle and ensure proper cleanup when the BaseSpanWrapper is ended.""" + raise NotImplementedError + + @staticmethod + def _log_lifespan_error(desc: str) -> None: + """Helper method to log a warning when an operation is attempted on an inactive BaseSpanWrapper. This can be used in methods that require an active BaseSpanWrapper to indicate potential misuse of the BaseSpanWrapper lifecycle.""" + logger.warning( + "Attempting to perform an operation on an inactive BaseSpanWrapper. This may indicate a bug in the telemetry implementation or misuse of the BaseSpanWrapper lifecycle." + ) + logger.warning("Description: %s", desc) + + # TODO -> Add Self annotation once 3.11 is the minimum supported version + def __enter__(self): + """Starts the BaseSpanWrapper and returns the BaseSpanWrapper instance for chaining. This method should check if the BaseSpanWrapper is already active and log a warning if an attempt is made to start an already active BaseSpanWrapper, to help identify potential issues with BaseSpanWrapper lifecycle management.""" + if self._active: + BaseSpanWrapper._log_lifespan_error( + "Attempting to start a BaseSpanWrapper that is already active." + ) + + self._span = self._exit_stack.enter_context(self._start_span()) + self._active = True + + return self + + def start(self) -> BaseSpanWrapper: + """Starts the BaseSpanWrapper and returns the BaseSpanWrapper instance for chaining""" + return self.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + """Stops the BaseSpanWrapper if it is active, and logs a warning if an attempt is made to stop a BaseSpanWrapper that is not active. This ensures that BaseSpanWrappers are properly cleaned up and that potential issues with BaseSpanWrapper lifecycle management are logged for debugging purposes.""" + if self._active: + self._exit_stack.__exit__(exc_type, exc_val, exc_tb) + self._span = None + self._active = False + else: + BaseSpanWrapper._log_lifespan_error( + "BaseSpanWrapper is not active and cannot be exited" + ) + + def end(self) -> None: + """Stops the BaseSpanWrapper if it is active""" + self.__exit__(None, None, None) diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/resource.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/resource.py new file mode 100644 index 00000000..e93fd464 --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/resource.py @@ -0,0 +1,18 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os + +from opentelemetry.sdk.resources import Resource + +SERVICE_NAME = "microsoft_agents" +SERVICE_VERSION = "1.0.0" + +RESOURCE = Resource.create( + { + "service.name": SERVICE_NAME, + "service.version": SERVICE_VERSION, + "service.instance.id": os.getenv("HOSTNAME", "unknown"), + "telemetry.sdk.language": "python", + } +) diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/simple_span_wrapper.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/simple_span_wrapper.py new file mode 100644 index 00000000..a7a2eb48 --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/simple_span_wrapper.py @@ -0,0 +1,40 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from abc import ABC +from collections.abc import Iterator +from contextlib import contextmanager + +from opentelemetry.trace import Span + +from ._agents_telemetry import agents_telemetry +from .base_span_wrapper import BaseSpanWrapper +from .type_defs import AttributeMap + + +class SimpleSpanWrapper(BaseSpanWrapper, ABC): + """Simple implementation of the BaseSpanWrapper that can be used when no additional attributes or functionality are needed on the span beyond what is provided by the base BaseSpanWrapper class. This can be used as a simple wrapper around an OTEL span for cases where no SDK-specific telemetry is needed, while still providing the benefits of the BaseSpanWrapper abstraction and lifecycle management.""" + + def __init__(self, span_name: str): + super().__init__() + self._span_name = span_name + + def _get_attributes(self) -> AttributeMap: + """Returns a dictionary of attributes to set on the span when it is started. This can be overridden by subclasses to provide custom attributes for the span based on the context in which it is being used.""" + return {} + + def _callback(self, span: Span, duration: float, error: Exception | None) -> None: + """Callback function that is called when the span is ended. This can be overridden by subclasses to provide custom logic for recording metrics or handling errors based on the outcome of the span.""" + pass + + @contextmanager + def _start_span(self) -> Iterator[Span]: + """Starts a basic OTEL span with the given name and no additional attributes.""" + with agents_telemetry.start_as_current_span( + self._span_name, callback=self._callback + ) as span: + yield span + if span is not None: + attributes = self._get_attributes() + if attributes: + span.set_attributes(attributes) diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/type_defs.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/type_defs.py new file mode 100644 index 00000000..0169e73f --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/core/type_defs.py @@ -0,0 +1,7 @@ +from typing import Mapping, Callable + +from opentelemetry.util.types import AttributeValue +from opentelemetry.trace import Span + +AttributeMap = Mapping[str, AttributeValue] +SpanCallback = Callable[[Span, float, Exception | None], None] diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/utils.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/utils.py new file mode 100644 index 00000000..693cb7bc --- /dev/null +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/telemetry/utils.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from microsoft_agents.activity import Activity, DeliveryModes + +from .attributes import UNKNOWN + + +def format_scopes(scopes: list[str] | None) -> str: + """Formats a list of scopes into a string for telemetry recording. If the list is None or empty, returns a constant value indicating unknown scopes.""" + if not scopes: + return UNKNOWN + return ",".join(scopes) + + +def get_conversation_id(activity: Activity) -> str: + """Extracts the conversation ID from the given activity. If the conversation ID cannot be found, returns a constant value indicating unknown conversation ID.""" + return activity.conversation.id if activity.conversation else UNKNOWN + + +def get_delivery_mode(activity: Activity) -> str: + """Extracts the delivery mode from the given activity. If the delivery mode cannot be found, returns a constant value indicating unknown delivery mode.""" + if activity.delivery_mode: + if isinstance(activity.delivery_mode, DeliveryModes): + return activity.delivery_mode.value + else: + return activity.delivery_mode + return UNKNOWN diff --git a/libraries/microsoft-agents-hosting-core/setup.py b/libraries/microsoft-agents-hosting-core/setup.py index b1da90cf..bcec11f3 100644 --- a/libraries/microsoft-agents-hosting-core/setup.py +++ b/libraries/microsoft-agents-hosting-core/setup.py @@ -17,5 +17,7 @@ "isodate>=0.6.1", "azure-core>=1.30.0", "python-dotenv>=1.1.1", + "opentelemetry-api>=1.27.0", + "opentelemetry-sdk>=1.27.0", ], ) diff --git a/test_samples/otel/env.TEMPLATE b/test_samples/otel/env.TEMPLATE new file mode 100644 index 00000000..bf65df42 --- /dev/null +++ b/test_samples/otel/env.TEMPLATE @@ -0,0 +1,16 @@ +CONNECTIONS__SERVICE_CONNECTION__SETTINGS__CLIENTID=client-id +CONNECTIONS__SERVICE_CONNECTION__SETTINGS__CLIENTSECRET=client-secret +CONNECTIONS__SERVICE_CONNECTION__SETTINGS__TENANTID=tenant-id + +AGENTAPPLICATION__USERAUTHORIZATION__HANDLERS__GRAPH__SETTINGS__AZUREBOTOAUTHCONNECTIONNAME=connection-name + +LOGGING__LOGLEVEL__microsoft_agents.hosting.core=INFO + +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 +OTEL_EXPORTER_OTLP_INSECURE=true + +OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" +OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" + +OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_REQUEST=".*" +OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_CLIENT_RESPONSE=".*" \ No newline at end of file diff --git a/test_samples/otel/requirements.txt b/test_samples/otel/requirements.txt new file mode 100644 index 00000000..879687ff --- /dev/null +++ b/test_samples/otel/requirements.txt @@ -0,0 +1,14 @@ +python-dotenv +aiohttp +microsoft-agents-hosting-aiohttp +microsoft-agents-hosting-core +microsoft-agents-authentication-msal +microsoft-agents-activity +opentelemetry-instrumentation-aiohttp-server +opentelemetry-instrumentation-aiohttp-client +opentelemetry-instrumentation-requests +opentelemetry-exporter-otlp +opentelemetry-sdk +opentelemetry-api +opentelemetry-instrumentation-logging +opentelemetry-instrumentation \ No newline at end of file diff --git a/test_samples/otel/src/__init__.py b/test_samples/otel/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test_samples/otel/src/agent.py b/test_samples/otel/src/agent.py new file mode 100644 index 00000000..048abf04 --- /dev/null +++ b/test_samples/otel/src/agent.py @@ -0,0 +1,86 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import re +import sys +import traceback +from dotenv import load_dotenv + +from os import environ +from microsoft_agents.hosting.aiohttp import CloudAdapter +from microsoft_agents.hosting.core import ( + Authorization, + AgentApplication, + TurnState, + TurnContext, + MemoryStorage, + MessageFactory, +) +from microsoft_agents.authentication.msal import MsalConnectionManager +from microsoft_agents.activity import load_configuration_from_env + +from .get_user_info import get_user_info +from .card import create_profile_card + +load_dotenv() +agents_sdk_config = load_configuration_from_env(environ) + +STORAGE = MemoryStorage() +CONNECTION_MANAGER = MsalConnectionManager(**agents_sdk_config) +ADAPTER = CloudAdapter(connection_manager=CONNECTION_MANAGER) +AUTHORIZATION = Authorization(STORAGE, CONNECTION_MANAGER, **agents_sdk_config) + + +AGENT_APP = AgentApplication[TurnState]( + storage=STORAGE, adapter=ADAPTER, authorization=AUTHORIZATION, **agents_sdk_config +) + + +@AGENT_APP.conversation_update("membersAdded") +async def on_members_added(context: TurnContext, _state: TurnState): + await context.send_activity( + "Welcome to the empty agent! " + "This agent is designed to be a starting point for your own agent development." + ) + return True + + +@AGENT_APP.message("/logout") +async def logout(context: TurnContext, state: TurnState) -> None: + await AGENT_APP.auth.sign_out(context, "GRAPH") + await context.send_activity(MessageFactory.text("You have been logged out.")) + + +@AGENT_APP.message( + re.compile(r"^/(me|profile)$", re.IGNORECASE), auth_handlers=["GRAPH"] +) +async def profile_request(context: TurnContext, state: TurnState) -> None: + user_token_response = await AGENT_APP.auth.get_token(context, "GRAPH") + if user_token_response and user_token_response is not None: + user_info = await get_user_info(user_token_response.token) + activity = MessageFactory.attachment(create_profile_card(user_info)) + await context.send_activity(activity) + else: + await context.send_activity( + 'Token not available. Enter "login" to sign in.' + ) + +@AGENT_APP.message(re.compile(r"^hello$")) +async def on_hello(context: TurnContext, _state: TurnState): + await context.send_activity("Hello!") + + +@AGENT_APP.activity("message") +async def on_message(context: TurnContext, _state: TurnState): + await context.send_activity(f"you said: {context.activity.text}") + +@AGENT_APP.error +async def on_error(context: TurnContext, error: Exception): + # This check writes out errors to console log .vs. app insights. + # NOTE: In production environment, you should consider logging this to Azure + # application insights. + print(f"\n [on_turn_error] unhandled error: {error}", file=sys.stderr) + traceback.print_exc() + + # Send a message to the user + await context.send_activity("The bot encountered an error or bug.") diff --git a/test_samples/otel/src/card.py b/test_samples/otel/src/card.py new file mode 100644 index 00000000..f2e44a28 --- /dev/null +++ b/test_samples/otel/src/card.py @@ -0,0 +1,73 @@ +from microsoft_agents.hosting.core import CardFactory + +def create_profile_card(profile): + return CardFactory.adaptive_card( + { + "$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "version": "1.5", + "type": "AdaptiveCard", + "body": [ + { + "type": "ColumnSet", + "columns": [ + { + "type": "Column", + "width": "auto", + "items": ( + [ + { + "type": "Image", + "altText": "", + "url": profile.get("imageUri", ""), + "style": "Person", + "size": "Small", + } + ] + if profile.get("imageUri") + else [] + ), + }, + { + "type": "Column", + "width": "auto", + "items": [ + { + "type": "TextBlock", + "weight": "Bolder", + "text": profile["displayName"], + }, + { + "type": "Container", + "spacing": "Small", + "items": [ + { + "type": "TextBlock", + "text": profile["jobTitle"], + "spacing": "Small", + }, + { + "type": "TextBlock", + "text": profile["mail"], + "spacing": "None", + }, + { + "type": "TextBlock", + "text": profile["givenName"], + "spacing": "None", + }, + { + "type": "TextBlock", + "text": profile["surname"], + "spacing": "None", + }, + ], + }, + ], + }, + ], + } + ], + } + ) + + diff --git a/test_samples/otel/src/get_user_info.py b/test_samples/otel/src/get_user_info.py new file mode 100644 index 00000000..4fded9b1 --- /dev/null +++ b/test_samples/otel/src/get_user_info.py @@ -0,0 +1,18 @@ +import aiohttp + +async def get_user_info(token): + """ + Get information about the current user from Microsoft Graph API. + """ + async with aiohttp.ClientSession() as session: + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + async with session.get( + "https://graph.microsoft.com/v1.0/me", headers=headers + ) as response: + if response.status == 200: + return await response.json() + error_text = await response.text() + raise Exception(f"Error from Graph API: {response.status} - {error_text}") diff --git a/test_samples/otel/src/main.py b/test_samples/otel/src/main.py new file mode 100644 index 00000000..bfd1ce41 --- /dev/null +++ b/test_samples/otel/src/main.py @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from .telemetry import configure_otel_providers + +configure_otel_providers(service_name="quickstart_agent") + +from .agent import AGENT_APP, CONNECTION_MANAGER +from .start_server import start_server + +start_server( + agent_application=AGENT_APP, + auth_configuration=CONNECTION_MANAGER.get_default_connection_configuration(), +) diff --git a/test_samples/otel/src/start_server.py b/test_samples/otel/src/start_server.py new file mode 100644 index 00000000..df891f52 --- /dev/null +++ b/test_samples/otel/src/start_server.py @@ -0,0 +1,40 @@ +from os import environ +import logging + +from microsoft_agents.hosting.core import AgentApplication, AgentAuthConfiguration +from microsoft_agents.hosting.aiohttp import ( + start_agent_process, + CloudAdapter, + jwt_authorization_middleware, +) +from aiohttp.web import Request, Response, Application, run_app + +logger = logging.getLogger(__name__) + + +def start_server( + agent_application: AgentApplication, auth_configuration: AgentAuthConfiguration +): + async def entry_point(req: Request) -> Response: + + logger.info("Request received at /api/messages endpoint.") + agent: AgentApplication = req.app["agent_app"] + adapter: CloudAdapter = req.app["adapter"] + + return await start_agent_process( + req, + agent, + adapter, + ) + + APP = Application(middlewares=[jwt_authorization_middleware]) + APP.router.add_post("/api/messages", entry_point) + + APP["agent_configuration"] = auth_configuration + APP["agent_app"] = agent_application + APP["adapter"] = agent_application.adapter + + try: + run_app(APP, host="localhost", port=environ.get("PORT", 3978)) + except Exception as error: + raise error diff --git a/test_samples/otel/src/telemetry.py b/test_samples/otel/src/telemetry.py new file mode 100644 index 00000000..ddd81109 --- /dev/null +++ b/test_samples/otel/src/telemetry.py @@ -0,0 +1,107 @@ +import logging +import os +import requests + +import aiohttp +from opentelemetry import metrics, trace +from opentelemetry.trace import Span +from opentelemetry._logs import set_logger_provider +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +from opentelemetry.instrumentation.aiohttp_server import AioHttpServerInstrumentor +from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor +from opentelemetry.instrumentation.requests import RequestsInstrumentor + +def instrument_libraries(): + """Instrument libraries for OpenTelemetry.""" + + # ## + # # instrument aiohttp client + # ## + def aiohttp_client_request_hook( + span: Span, params: aiohttp.TraceRequestStartParams + ): + if span and span.is_recording(): + span.set_attribute("http.url", str(params.url)) + + def aiohttp_client_response_hook( + span: Span, + params: aiohttp.TraceRequestEndParams | aiohttp.TraceRequestExceptionParams, + ): + if span and span.is_recording(): + span.set_attribute("http.url", str(params.url)) + + AioHttpClientInstrumentor().instrument( + request_hook=aiohttp_client_request_hook, + response_hook=aiohttp_client_response_hook, + ) + + # + # instrument requests library + ## + def requests_request_hook(span: Span, request: requests.Request): + if span and span.is_recording(): + span.set_attribute("http.url", request.url) + + def requests_response_hook( + span: Span, request: requests.Request, response: requests.Response + ): + if span and span.is_recording(): + span.set_attribute("http.url", response.url) + + RequestsInstrumentor().instrument( + request_hook=requests_request_hook, response_hook=requests_response_hook + ) + +def configure_otel_providers(service_name: str = "app"): + """Configure OpenTelemetry for FastAPI application.""" + + # Create resource with service name + resource = Resource.create( + { + "service.name": service_name, + "service.version": "1.0.0", + "service.instance.id": os.getenv("HOSTNAME", "unknown"), + "telemetry.sdk.language": "python", + } + ) + + endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317/") + + # Configure Tracing + tracer_provider = TracerProvider(resource=resource) + tracer_provider.add_span_processor( + SimpleSpanProcessor(OTLPSpanExporter(endpoint=endpoint)) + ) + trace.set_tracer_provider(tracer_provider) + + # Configure Metrics + metric_reader = PeriodicExportingMetricReader( + OTLPMetricExporter(endpoint=endpoint) + ) + meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader]) + metrics.set_meter_provider(meter_provider) + + # Configure Logging + logger_provider = LoggerProvider(resource=resource) + logger_provider.add_log_record_processor( + BatchLogRecordProcessor(OTLPLogExporter(endpoint=endpoint)) + ) + set_logger_provider(logger_provider) + + # Add logging handler + handler = LoggingHandler(level=logging.NOTSET, logger_provider=logger_provider) + logging.getLogger().addHandler(handler) + + logging.getLogger().info("OpenTelemetry providers configured with endpoint: %s", endpoint) + + instrument_libraries() \ No newline at end of file diff --git a/test_samples/otel/start_dashboard.ps1 b/test_samples/otel/start_dashboard.ps1 new file mode 100644 index 00000000..de2dd386 --- /dev/null +++ b/test_samples/otel/start_dashboard.ps1 @@ -0,0 +1 @@ +docker run --rm -it -p 18888:18888 -p 4317:18889 --name aspire-dashboard mcr.microsoft.com/dotnet/aspire-dashboard:latest \ No newline at end of file diff --git a/tests/_common/_tests/test_delta_metric_reader.py b/tests/_common/_tests/test_delta_metric_reader.py new file mode 100644 index 00000000..67be5eed --- /dev/null +++ b/tests/_common/_tests/test_delta_metric_reader.py @@ -0,0 +1,212 @@ +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader + +from tests._common.fixtures.telemetry import DeltaMetricReader +from tests._common.telemetry_utils import find_metric, sum_counter, sum_hist_count + + +def _make_reader(): + """Create a standalone MeterProvider + InMemoryMetricReader pair.""" + inner = InMemoryMetricReader() + provider = MeterProvider([inner]) + meter = provider.get_meter("test") + return inner, provider, meter + + +# ---- basic counter delta ---- + + +def test_counter_delta_excludes_baseline(): + inner, provider, meter = _make_reader() + counter = meter.create_counter("my_counter") + + counter.add(5) + delta = DeltaMetricReader(inner) # baseline captures 5 + + counter.add(3) + data = delta.get_metrics_data() + + assert sum_counter(find_metric(data, "my_counter")) == 3 + provider.shutdown() + + +def test_counter_delta_is_zero_when_nothing_new(): + inner, provider, meter = _make_reader() + counter = meter.create_counter("my_counter") + + counter.add(10) + delta = DeltaMetricReader(inner) + + data = delta.get_metrics_data() + metric = find_metric(data, "my_counter") + # No new increments → metric either absent or zero + assert metric is None or sum_counter(metric) == 0 + provider.shutdown() + + +def test_counter_accumulates_across_calls(): + inner, provider, meter = _make_reader() + counter = meter.create_counter("my_counter") + + delta = DeltaMetricReader(inner) + + counter.add(2) + counter.add(3) + + data = delta.get_metrics_data() + assert sum_counter(find_metric(data, "my_counter")) == 5 + provider.shutdown() + + +# ---- reset ---- + + +def test_reset_clears_delta(): + inner, provider, meter = _make_reader() + counter = meter.create_counter("my_counter") + + delta = DeltaMetricReader(inner) + counter.add(7) + + delta.reset() # new baseline includes the 7 + + data = delta.get_metrics_data() + metric = find_metric(data, "my_counter") + assert metric is None or sum_counter(metric) == 0 + + counter.add(2) + data = delta.get_metrics_data() + assert sum_counter(find_metric(data, "my_counter")) == 2 + provider.shutdown() + + +# ---- histogram delta ---- + + +def test_histogram_delta_excludes_baseline(): + inner, provider, meter = _make_reader() + hist = meter.create_histogram("my_hist") + + hist.record(100) + hist.record(200) + delta = DeltaMetricReader(inner) # baseline count=2 + + hist.record(50) + data = delta.get_metrics_data() + + assert sum_hist_count(find_metric(data, "my_hist")) == 1 + provider.shutdown() + + +def test_histogram_delta_is_zero_when_nothing_new(): + inner, provider, meter = _make_reader() + hist = meter.create_histogram("my_hist") + + hist.record(42) + delta = DeltaMetricReader(inner) + + data = delta.get_metrics_data() + metric = find_metric(data, "my_hist") + assert metric is None or sum_hist_count(metric) == 0 + provider.shutdown() + + +# ---- attribute-keyed counters ---- + + +def test_counter_delta_respects_attributes(): + inner, provider, meter = _make_reader() + counter = meter.create_counter("tagged") + + counter.add(10, attributes={"ch": "teams"}) + counter.add(20, attributes={"ch": "webchat"}) + + delta = DeltaMetricReader(inner) + + counter.add(1, attributes={"ch": "teams"}) + counter.add(2, attributes={"ch": "webchat"}) + + data = delta.get_metrics_data() + metric = find_metric(data, "tagged") + + assert sum_counter(metric, {"ch": "teams"}) == 1 + assert sum_counter(metric, {"ch": "webchat"}) == 2 + provider.shutdown() + + +# ---- multiple metrics ---- + + +def test_multiple_metrics_tracked_independently(): + inner, provider, meter = _make_reader() + c1 = meter.create_counter("counter_a") + c2 = meter.create_counter("counter_b") + + c1.add(100) + delta = DeltaMetricReader(inner) + + c1.add(1) + c2.add(5) + + data = delta.get_metrics_data() + assert sum_counter(find_metric(data, "counter_a")) == 1 + assert sum_counter(find_metric(data, "counter_b")) == 5 + provider.shutdown() + + +# ---- new metric after baseline ---- + + +def test_new_metric_after_baseline(): + inner, provider, meter = _make_reader() + + delta = DeltaMetricReader(inner) + + counter = meter.create_counter("late_counter") + counter.add(3) + + data = delta.get_metrics_data() + assert sum_counter(find_metric(data, "late_counter")) == 3 + provider.shutdown() + + +# ---- force_flush delegates ---- + + +def test_force_flush_delegates(): + inner, provider, meter = _make_reader() + delta = DeltaMetricReader(inner) + + counter = meter.create_counter("flushed") + counter.add(1) + delta.force_flush() + + data = delta.get_metrics_data() + assert sum_counter(find_metric(data, "flushed")) == 1 + provider.shutdown() + + +# ---- output structure is compatible with find_metric ---- + + +def test_output_structure_compatible_with_helpers(): + inner, provider, meter = _make_reader() + delta = DeltaMetricReader(inner) + + counter = meter.create_counter("compat") + counter.add(1) + + data = delta.get_metrics_data() + + assert hasattr(data, "resource_metrics") + rm = data.resource_metrics[0] + assert hasattr(rm, "scope_metrics") + sm = rm.scope_metrics[0] + assert hasattr(sm, "metrics") + m = sm.metrics[0] + assert m.name == "compat" + assert hasattr(m.data, "data_points") + dp = m.data.data_points[0] + assert hasattr(dp, "value") + assert hasattr(dp, "attributes") + provider.shutdown() diff --git a/tests/_common/fixtures/telemetry.py b/tests/_common/fixtures/telemetry.py new file mode 100644 index 00000000..5de63784 --- /dev/null +++ b/tests/_common/fixtures/telemetry.py @@ -0,0 +1,169 @@ +import pytest +from types import SimpleNamespace + +from opentelemetry import trace, metrics +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader + + +class DeltaMetricReader: + """Wraps an InMemoryMetricReader so each test only sees metrics + accrued *after* the wrapper was created (or last reset). + + InMemoryMetricReader uses cumulative aggregation by default and has + no ``clear()`` method, so counters and histograms accumulate across + the whole session. This wrapper snapshots the cumulative values at + construction time and subtracts them from every subsequent + ``get_metrics_data()`` call, producing a delta view that is + compatible with the ``find_metric`` / ``sum_counter`` / + ``sum_hist_count`` helpers. + """ + + def __init__(self, inner: InMemoryMetricReader): + self._inner = inner + self._baseline: dict[tuple, tuple] = {} + self.reset() + + def reset(self): + """Capture the current cumulative values as the new zero-line.""" + data = self._inner.get_metrics_data() + self._baseline = self._snapshot(data) + + def force_flush(self): + self._inner.force_flush() + + def get_metrics_data(self): + """Return a metrics-data object containing only the delta + since the last ``reset()``.""" + raw = self._inner.get_metrics_data() + return self._subtract(raw, self._baseline) + + # -- internals -------------------------------------------------- + + @staticmethod + def _dp_key(metric_name, dp): + attrs = dp.attributes or {} + return (metric_name, tuple(sorted(attrs.items()))) + + @staticmethod + def _snapshot(data): + snap: dict[tuple, tuple] = {} + if data is None: + return snap + for rm in data.resource_metrics: + for sm in rm.scope_metrics: + for m in sm.metrics: + for dp in m.data.data_points: + k = DeltaMetricReader._dp_key(m.name, dp) + if hasattr(dp, "bucket_counts"): + snap[k] = ("hist", dp.count) + else: + snap[k] = ("counter", dp.value) + return snap + + @staticmethod + def _empty_data(): + return SimpleNamespace( + resource_metrics=[ + SimpleNamespace(scope_metrics=[SimpleNamespace(metrics=[])]) + ] + ) + + @staticmethod + def _subtract(data, baseline): + if data is None: + return DeltaMetricReader._empty_data() + all_metrics: list = [] + for rm in data.resource_metrics: + for sm in rm.scope_metrics: + for m in sm.metrics: + points: list = [] + for dp in m.data.data_points: + k = DeltaMetricReader._dp_key(m.name, dp) + base = baseline.get(k) + if hasattr(dp, "bucket_counts"): + base_count = base[1] if base else 0 + points.append( + SimpleNamespace( + attributes=dp.attributes, + count=dp.count - base_count, + ) + ) + else: + base_val = base[1] if base else 0 + points.append( + SimpleNamespace( + attributes=dp.attributes, + value=dp.value - base_val, + ) + ) + if points: + all_metrics.append( + SimpleNamespace( + name=m.name, + data=SimpleNamespace(data_points=points), + ) + ) + return SimpleNamespace( + resource_metrics=[ + SimpleNamespace(scope_metrics=[SimpleNamespace(metrics=all_metrics)]) + ] + ) + + +_metric_reader = None +_exporter = None + + +@pytest.fixture(scope="session") +def test_telemetry(): + """Set up fresh in-memory exporter for testing.""" + global _exporter, _metric_reader + + if _exporter is None: + exporter = InMemorySpanExporter() + metric_reader = InMemoryMetricReader() + + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(exporter)) + trace.set_tracer_provider(tracer_provider) + + meter_provider = MeterProvider([metric_reader]) + + metrics.set_meter_provider(meter_provider) + + _exporter = exporter + _metric_reader = metric_reader + else: + meter_provider = metrics.get_meter_provider() + tracer_provider = trace.get_tracer_provider() + + exporter = _exporter + metric_reader = _metric_reader + + yield _exporter, metric_reader + + exporter.clear() + meter_provider.force_flush() + # tracer_provider.shutdown() + # meter_provider.shutdown() + + +@pytest.fixture(scope="function") +def test_exporter(test_telemetry): + """Provide the in-memory span exporter for each test.""" + exporter, _ = test_telemetry + exporter.clear() + return exporter + + +@pytest.fixture(scope="function") +def test_metric_reader(test_telemetry): + """Provide a delta view of the metric reader for each test. + Only metrics recorded *during* the test are visible.""" + _, metric_reader = test_telemetry + metric_reader.force_flush() + return DeltaMetricReader(metric_reader) diff --git a/tests/_common/telemetry_utils.py b/tests/_common/telemetry_utils.py new file mode 100644 index 00000000..3acb4008 --- /dev/null +++ b/tests/_common/telemetry_utils.py @@ -0,0 +1,38 @@ +def find_metric(metrics_data, metric_name): + """Helper function to find a metric by name in the collected metrics data. + + Usage: + metric = find_metric(metrics_data, "my_metric_name") + """ + for resource_metric in metrics_data.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + for metric in scope_metric.metrics: + if metric.name == metric_name: + return metric + return None + + +def sum_counter(metric, attribute_filter=None): + if metric is None: + return 0 + total = 0 + for point in metric.data.data_points: + if attribute_filter is None or all( + point.attributes.get(key) == value + for key, value in attribute_filter.items() + ): + total += point.value + return total + + +def sum_hist_count(metric, attribute_filter=None): + if metric is None: + return 0 + total = 0 + for point in metric.data.data_points: + if attribute_filter is None or all( + point.attributes.get(key) == value + for key, value in attribute_filter.items() + ): + total += point.count + return total diff --git a/tests/hosting_core/telemetry/__init__.py b/tests/hosting_core/telemetry/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/hosting_core/telemetry/test_agents_telemetry.py b/tests/hosting_core/telemetry/test_agents_telemetry.py new file mode 100644 index 00000000..954daa9a --- /dev/null +++ b/tests/hosting_core/telemetry/test_agents_telemetry.py @@ -0,0 +1,109 @@ +from opentelemetry import trace + +from tests._common.fixtures.telemetry import ( # unused imports are needed for fixtures + test_telemetry, + test_exporter, + test_metric_reader, +) + +from tests._common.telemetry_utils import find_metric, sum_counter + +from microsoft_agents.hosting.core.telemetry import ( + agents_telemetry, + SERVICE_NAME, + SERVICE_VERSION, +) + + +def test_tracer(test_exporter): + """Test that the tracer is initialized with the correct service name and version.""" + + with agents_telemetry.tracer.start_as_current_span("test_span"): + pass + + spans = test_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "test_span" + assert spans[0].instrumentation_scope.name == SERVICE_NAME + assert spans[0].instrumentation_scope.version == SERVICE_VERSION + + +def test_meter(test_metric_reader): + """Test that the meter is initialized with the correct service name and version.""" + counter = agents_telemetry.meter.create_counter("test_counter") + counter.add(1) + + metrics_data = test_metric_reader.get_metrics_data() + metric = find_metric(metrics_data, "test_counter") + assert len(metric.data.data_points) == 1 + assert sum_counter(metric) == 1 + assert metric.name == "test_counter" + + +def test_start_as_current_span(test_exporter): + """Test start_as_current_span creates a span with context attributes.""" + + with agents_telemetry.start_as_current_span("test_span"): + pass + + spans = test_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "test_span" + assert spans[0].instrumentation_scope.name == SERVICE_NAME + assert spans[0].instrumentation_scope.version == SERVICE_VERSION + + +def test_start_as_current_span_with_callback(mocker, test_exporter): + """Test start_as_current_span records success status and callback payload.""" + callback = mocker.Mock() + + with agents_telemetry.start_as_current_span( + "test_span", + callback=callback, + ): + pass + + finished_spans = test_exporter.get_finished_spans() + assert len(finished_spans) == 1 + + finished_span = finished_spans[0] + assert finished_span.name == "test_span" + assert finished_span.status.status_code == trace.StatusCode.OK + + callback.assert_called_once() + callback_span, duration_ms, callback_exception = callback.call_args.args + assert callback_span.name == "test_span" + assert duration_ms >= 0 + assert callback_exception is None + + +def test_start_as_current_span_with_callback_with_failure(mocker, test_exporter): + """Test start_as_current_span records failure status and callback payload.""" + callback = mocker.Mock() + + exception_raised = False + try: + with agents_telemetry.start_as_current_span( + "test_span", + callback=callback, + ): + raise ValueError("Test exception") + except ValueError as ex: + exception_raised = True + assert str(ex) == "Test exception" + + assert exception_raised + + finished_spans = test_exporter.get_finished_spans() + assert len(finished_spans) == 1 + + finished_span = finished_spans[0] + assert finished_span.name == "test_span" + assert finished_span.status.status_code == trace.StatusCode.ERROR + + callback.assert_called_once() + callback_span, duration_ms, callback_exception = callback.call_args.args + assert callback_span.name == "test_span" + assert duration_ms >= 0 + assert callback_exception is not None + assert str(callback_exception) == "Test exception" diff --git a/tests/hosting_core/telemetry/test_simple_span_wrapper.py b/tests/hosting_core/telemetry/test_simple_span_wrapper.py new file mode 100644 index 00000000..e0e44c5b --- /dev/null +++ b/tests/hosting_core/telemetry/test_simple_span_wrapper.py @@ -0,0 +1,252 @@ +import time + +import pytest +from types import SimpleNamespace + +from opentelemetry.trace import StatusCode + +from tests._common.fixtures.telemetry import ( # unused imports are needed for fixtures + test_telemetry, + test_exporter, + test_metric_reader, +) +from tests._common.telemetry_utils import ( + find_metric, + sum_counter, + sum_hist_count, +) + +from microsoft_agents.hosting.core import TurnContext +from microsoft_agents.hosting.core.telemetry import ( + agents_telemetry, + SimpleSpanWrapper, +) + + +class MySpanWrapper(SimpleSpanWrapper): + """Subclass with custom attributes and a callback that records info on the span.""" + + def __init__(self, span_name): + super().__init__(span_name) + + def _callback(self, span, duration_ms, exception): + span.set_attribute("callback_called", True) + span.set_attribute("duration_ms", duration_ms) + if exception: + span.set_attribute("exception_message", str(exception)) + + def _get_attributes(self): + return {"custom_attribute": "custom_value"} + + +class MinimalSpanWrapper(SimpleSpanWrapper): + """Subclass that uses default (no-op) _callback and empty _get_attributes.""" + + def __init__(self, span_name): + super().__init__(span_name) + + +class TestSimpleSpanWrapper: + def test_simple_span_wrapper(self, test_exporter): + """Test that MySpanWrapper creates a span with the correct attributes and callback.""" + with MySpanWrapper("test_simple_span"): + pass + + spans = test_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == "test_simple_span" + assert span.attributes["custom_attribute"] == "custom_value" + assert span.attributes["callback_called"] is True + assert span.attributes["duration_ms"] >= 0 + + def test_minimal_span_wrapper_creates_span(self, test_exporter): + """A subclass with no overrides still creates a valid span.""" + with MinimalSpanWrapper("minimal_span"): + pass + + spans = test_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "minimal_span" + + def test_minimal_span_no_custom_attributes(self, test_exporter): + """Default _get_attributes returns empty dict, so no custom attributes are set.""" + with MinimalSpanWrapper("no_attrs"): + pass + + span = test_exporter.get_finished_spans()[0] + # The span should not have the custom_attribute key + assert "custom_attribute" not in (span.attributes or {}) + + def test_span_status_ok_on_success(self, test_exporter): + """Span status is OK when the body completes without error.""" + with MySpanWrapper("ok_span"): + pass + + span = test_exporter.get_finished_spans()[0] + assert span.status.status_code == StatusCode.OK + + def test_span_status_error_on_exception(self, test_exporter): + """Span status is ERROR and exception is re-raised when body raises.""" + with pytest.raises(ValueError, match="boom"): + with MySpanWrapper("err_span"): + raise ValueError("boom") + + span = test_exporter.get_finished_spans()[0] + assert span.status.status_code == StatusCode.ERROR + + def test_callback_receives_exception(self, test_exporter): + """The callback receives the exception object when the body raises.""" + with pytest.raises(RuntimeError): + with MySpanWrapper("cb_err"): + raise RuntimeError("fail") + + span = test_exporter.get_finished_spans()[0] + assert span.attributes["callback_called"] is True + assert span.attributes["exception_message"] == "fail" + + def test_exception_is_recorded_on_span(self, test_exporter): + """record_exception is called, so the span events contain the exception.""" + with pytest.raises(TypeError): + with MySpanWrapper("rec_exc"): + raise TypeError("type error") + + span = test_exporter.get_finished_spans()[0] + exception_events = [e for e in span.events if e.name == "exception"] + assert len(exception_events) == 1 + assert "type error" in exception_events[0].attributes["exception.message"] + + def test_span_completion_event_on_success(self, test_exporter): + """A completion event is added on successful span execution.""" + with MinimalSpanWrapper("evt_span"): + pass + + span = test_exporter.get_finished_spans()[0] + completion_events = [e for e in span.events if "completed" in e.name] + assert len(completion_events) == 1 + assert completion_events[0].attributes["duration_ms"] >= 0 + + def test_no_completion_event_on_failure(self, test_exporter): + """No completion event is added when the span body raises.""" + with pytest.raises(Exception): + with MinimalSpanWrapper("no_evt"): + raise Exception("oops") + + span = test_exporter.get_finished_spans()[0] + completion_events = [e for e in span.events if "completed" in e.name] + assert len(completion_events) == 0 + + def test_duration_is_positive(self, test_exporter): + """The callback's duration_ms reflects actual elapsed time.""" + with MySpanWrapper("dur_span"): + time.sleep(0.05) + + span = test_exporter.get_finished_spans()[0] + assert span.attributes["duration_ms"] >= 40 # at least ~40ms + + def test_active_property_inside_context(self, test_exporter): + """The active property is True while the context manager is open.""" + wrapper = MySpanWrapper("active_test") + assert wrapper.active is False + + with wrapper: + assert wrapper.active is True + + assert wrapper.active is False + + def test_otel_span_accessible_inside_context(self, test_exporter): + """otel_span returns the underlying span while active.""" + wrapper = MinimalSpanWrapper("otel_access") + with wrapper: + otel_span = wrapper.otel_span + assert otel_span is not None + + def test_otel_span_raises_when_not_started(self): + """Accessing otel_span before start raises RuntimeError.""" + wrapper = MinimalSpanWrapper("not_started") + assert wrapper.otel_span is None + + def test_start_end_manual_lifecycle(self, test_exporter): + """start() and end() can be used instead of the context manager.""" + wrapper = MySpanWrapper("manual_lifecycle") + wrapper.start() + assert wrapper.active is True + wrapper.end() + assert wrapper.active is False + + spans = test_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "manual_lifecycle" + + def test_multiple_sequential_spans(self, test_exporter): + """Multiple span wrappers used sequentially each create their own span.""" + with MySpanWrapper("seq_1"): + pass + with MySpanWrapper("seq_2"): + pass + with MinimalSpanWrapper("seq_3"): + pass + + spans = test_exporter.get_finished_spans() + assert len(spans) == 3 + names = [s.name for s in spans] + assert "seq_1" in names + assert "seq_2" in names + assert "seq_3" in names + + def test_nested_span_wrappers(self, test_exporter): + """Nested span wrappers create parent-child span relationships.""" + with MySpanWrapper("parent"): + with MinimalSpanWrapper("child"): + pass + + spans = test_exporter.get_finished_spans() + assert len(spans) == 2 + child_span = next(s for s in spans if s.name == "child") + parent_span = next(s for s in spans if s.name == "parent") + assert child_span.parent.span_id == parent_span.context.span_id + + def test_wrapper_reuse_after_end(self, test_exporter): + """A wrapper can be reused after it has been ended.""" + wrapper = MySpanWrapper("reuse") + + with wrapper: + pass + assert wrapper.active is False + + # Re-enter + with wrapper: + assert wrapper.active is True + assert wrapper.active is False + + spans = test_exporter.get_finished_spans() + assert len(spans) == 2 + assert all(s.name == "reuse" for s in spans) + + def test_custom_attributes_set_on_span(self, test_exporter): + """Custom attributes from _get_attributes appear on the finished span.""" + + class MultiAttrWrapper(SimpleSpanWrapper): + def __init__(self): + super().__init__("multi_attr") + + def _get_attributes(self): + return {"key_a": "val_a", "key_b": 42, "key_c": True} + + with MultiAttrWrapper(): + pass + + span = test_exporter.get_finished_spans()[0] + assert span.attributes["key_a"] == "val_a" + assert span.attributes["key_b"] == 42 + assert span.attributes["key_c"] is True + + def test_exception_propagates_unchanged(self, test_exporter): + """The original exception type and message are preserved after re-raise.""" + + class CustomError(Exception): + pass + + with pytest.raises(CustomError, match="custom msg"): + with MinimalSpanWrapper("propagate"): + raise CustomError("custom msg") diff --git a/tests/hosting_core/telemetry/test_utils.py b/tests/hosting_core/telemetry/test_utils.py new file mode 100644 index 00000000..8f4bd044 --- /dev/null +++ b/tests/hosting_core/telemetry/test_utils.py @@ -0,0 +1,72 @@ +from microsoft_agents.activity import ( + Activity, + ConversationAccount, + DeliveryModes, +) +from microsoft_agents.hosting.core.telemetry.attributes import UNKNOWN +from microsoft_agents.hosting.core.telemetry.utils import ( + format_scopes, + get_conversation_id, + get_delivery_mode, +) + +# ---- format_scopes ---- + + +def test_format_scopes_single(): + assert format_scopes(["User.Read"]) == "User.Read" + + +def test_format_scopes_multiple(): + assert format_scopes(["User.Read", "Mail.Read"]) == "User.Read,Mail.Read" + + +def test_format_scopes_none(): + assert format_scopes(None) == UNKNOWN + + +def test_format_scopes_empty_list(): + assert format_scopes([]) == UNKNOWN + + +# ---- get_conversation_id ---- + + +def test_get_conversation_id_present(): + activity = Activity( + type="message", + conversation=ConversationAccount(id="conv-123"), + ) + assert get_conversation_id(activity) == "conv-123" + + +def test_get_conversation_id_no_conversation(): + activity = Activity(type="message") + assert get_conversation_id(activity) == UNKNOWN + + +# ---- get_delivery_mode ---- + + +def test_get_delivery_mode_enum(): + activity = Activity( + type="message", + delivery_mode=DeliveryModes.expect_replies, + ) + assert get_delivery_mode(activity) == "expectReplies" + + +def test_get_delivery_mode_string(): + activity = Activity(type="message", delivery_mode="custom_mode") + assert get_delivery_mode(activity) == "custom_mode" + + +def test_get_delivery_mode_none(): + activity = Activity(type="message") + assert get_delivery_mode(activity) == UNKNOWN + + +def test_get_delivery_mode_all_enum_values(): + for mode in DeliveryModes: + activity = Activity(type="message", delivery_mode=mode) + assert get_delivery_mode(activity) == mode.value From c92c4bd7829aa3a4bb46aa919a0b51ba70264208 Mon Sep 17 00:00:00 2001 From: Rodrigo Brandao Date: Thu, 26 Mar 2026 11:34:11 -0700 Subject: [PATCH 2/3] Removed unused file --- .../activity/otel/__init__.py | 30 ------------------- 1 file changed, 30 deletions(-) delete mode 100644 libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py diff --git a/libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py b/libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py deleted file mode 100644 index 329e7b13..00000000 --- a/libraries/microsoft-agents-activity/microsoft_agents/activity/otel/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -from __future__ import annotations - -import functools -import logging -import types -from typing import Any, Collection, Literal - - -class AgentMetrics: - - tracer: Tracer - - -class AgentsSDKInstrumentor(BaseInstrumentor): - """An OpenTelemetry Instrumentor for the Microsoft Agents SDK.""" - - def _instrument(self, **kwargs: Any) -> None: - """Instruments the Microsoft Agents SDK.""" - try: - import microsoft_agents.activity.otel._agent_activity_tracing - except ImportError as exc: - logging.warning( - "Failed to import Microsoft Agents SDK instrumentation module: %s", - exc, - ) - - def _uninstrument(self, **kwargs: Any) -> None: - """Uninstruments the Microsoft Agents SDK.""" - # No uninstrumentation logic needed for this instrumentation - pass From 7d1a2c1f8d70e46e6b2ab9acd930d7939259954b Mon Sep 17 00:00:00 2001 From: Rodrigo Brandao Date: Thu, 26 Mar 2026 11:37:41 -0700 Subject: [PATCH 3/3] Undid weird formatting --- .../hosting/core/app/agent_application.py | 38 +++++++------------ 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py index 60875a0e..bbc48ba4 100644 --- a/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py +++ b/libraries/microsoft-agents-hosting-core/microsoft_agents/hosting/core/app/agent_application.py @@ -120,11 +120,9 @@ def __init__( "ApplicationOptions.storage is required and was not configured.", stack_info=True, ) - raise ApplicationError( - """ + raise ApplicationError(""" The `ApplicationOptions.storage` property is required and was not configured. - """ - ) + """) if options.long_running_messages and ( not options.adapter or not options.bot_app_id @@ -133,12 +131,10 @@ def __init__( "ApplicationOptions.long_running_messages requires an adapter and bot_app_id.", stack_info=True, ) - raise ApplicationError( - """ + raise ApplicationError(""" The `ApplicationOptions.long_running_messages` property is unavailable because no adapter or `bot_app_id` was configured. - """ - ) + """) if options.adapter: self._adapter = options.adapter @@ -180,12 +176,10 @@ def adapter(self) -> ChannelServiceAdapter: "AgentApplication.adapter(): self._adapter is not configured.", stack_info=True, ) - raise ApplicationError( - """ + raise ApplicationError(""" The AgentApplication.adapter property is unavailable because it was not configured when creating the AgentApplication. - """ - ) + """) return self._adapter @@ -203,12 +197,10 @@ def auth(self) -> Authorization: "AgentApplication.auth(): self._auth is not configured.", stack_info=True, ) - raise ApplicationError( - """ + raise ApplicationError(""" The `AgentApplication.auth` property is unavailable because no Auth options were configured. - """ - ) + """) return self._auth @@ -592,12 +584,10 @@ async def sign_in_success(context: TurnContext, state: TurnState, connection_id: f"Failed to register sign-in success handler for route handler {func.__name__}", stack_info=True, ) - raise ApplicationError( - """ + raise ApplicationError(""" The `AgentApplication.on_sign_in_success` method is unavailable because no Auth options were configured. - """ - ) + """) return func def on_sign_in_failure( @@ -628,12 +618,10 @@ async def sign_in_failure(context: TurnContext, state: TurnState, connection_id: f"Failed to register sign-in failure handler for route handler {func.__name__}", stack_info=True, ) - raise ApplicationError( - """ + raise ApplicationError(""" The `AgentApplication.on_sign_in_failure` method is unavailable because no Auth options were configured. - """ - ) + """) return func def error( @@ -876,4 +864,4 @@ async def _on_error(self, context: TurnContext, err: ApplicationError) -> None: exc_info=True, ) logger.error(err) - raise err + raise err \ No newline at end of file