diff --git a/sdk/ai/azure-ai-projects/assets.json b/sdk/ai/azure-ai-projects/assets.json index 59d949fbd123..31a4dbf80c15 100644 --- a/sdk/ai/azure-ai-projects/assets.json +++ b/sdk/ai/azure-ai-projects/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/ai/azure-ai-projects", - "Tag": "python/ai/azure-ai-projects_b88e0edce8" + "Tag": "python/ai/azure-ai-projects_8a24536cc3" } diff --git a/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets.py b/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets.py index a2c2c30c9e0b..6e538f2b886a 100644 --- a/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets.py +++ b/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets.py @@ -20,7 +20,7 @@ Set these environment variables with your own values: 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your Microsoft Foundry project. - 2) CONNECTION_NAME - Required. The name of the Azure Storage Account connection to use for uploading files. + 2) CONNECTION_NAME - Optional. The name of the Azure Storage Account connection to use for uploading files. 3) DATASET_NAME - Optional. The name of the Dataset to create and use in this sample. 4) DATASET_VERSION_1 - Optional. The first version of the Dataset to create and use in this sample. 5) DATASET_VERSION_2 - Optional. The second version of the Dataset to create and use in this sample. @@ -32,12 +32,12 @@ from dotenv import load_dotenv from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient -from azure.ai.projects.models import DatasetVersion +from azure.ai.projects.models import DatasetVersion, ConnectionType load_dotenv() endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] -connection_name = os.environ["CONNECTION_NAME"] +connection_name = os.environ.get("CONNECTION_NAME") dataset_name = os.environ.get("DATASET_NAME", "dataset-test") dataset_version_1 = os.environ.get("DATASET_VERSION_1", "1.0") dataset_version_2 = os.environ.get("DATASET_VERSION_2", "2.0") @@ -52,6 +52,16 @@ AIProjectClient(endpoint=endpoint, credential=credential) as project_client, ): + if not connection_name: + try: + connection_name = project_client.connections.get_default(ConnectionType.AZURE_STORAGE_ACCOUNT).name + except Exception as e: + raise ValueError( + "Unable to resolve a default Azure Storage connection from this project endpoint. " + "Use a project endpoint configured with an Azure Storage account connection, " + "or set CONNECTION_NAME explicitly." + ) from e + # [START datasets_sample] print( f"Upload a single file and create a new Dataset `{dataset_name}`, version `{dataset_version_1}`, to reference the file." diff --git a/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_async.py b/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_async.py index d159fea9e2af..e78c4f1c1990 100644 --- a/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_async.py +++ b/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_async.py @@ -20,7 +20,7 @@ Set these environment variables with your own values: 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your Microsoft Foundry project. - 2) CONNECTION_NAME - Required. The name of the Azure Storage Account connection to use for uploading files. + 2) CONNECTION_NAME - Optional. The name of the Azure Storage Account connection to use for uploading files. 3) DATASET_NAME - Optional. The name of the Dataset to create and use in this sample. 4) DATASET_VERSION_1 - Optional. The first version of the Dataset to create and use in this sample. 5) DATASET_VERSION_2 - Optional. The second version of the Dataset to create and use in this sample. @@ -33,7 +33,7 @@ from dotenv import load_dotenv from azure.identity.aio import DefaultAzureCredential from azure.ai.projects.aio import AIProjectClient -from azure.ai.projects.models import DatasetVersion +from azure.ai.projects.models import DatasetVersion, ConnectionType load_dotenv() @@ -46,7 +46,7 @@ async def main() -> None: endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] - connection_name = os.environ["CONNECTION_NAME"] + connection_name = os.environ.get("CONNECTION_NAME") dataset_name = os.environ.get("DATASET_NAME", "dataset-test") dataset_version_1 = os.environ.get("DATASET_VERSION_1", "1.0") dataset_version_2 = os.environ.get("DATASET_VERSION_2", "2.0") @@ -56,6 +56,18 @@ async def main() -> None: AIProjectClient(endpoint=endpoint, credential=credential) as project_client, ): + if not connection_name: + try: + connection_name = ( + await project_client.connections.get_default(ConnectionType.AZURE_STORAGE_ACCOUNT) + ).name + except Exception as e: + raise ValueError( + "Unable to resolve a default Azure Storage connection from this project endpoint. " + "Use a project endpoint configured with an Azure Storage account connection, " + "or set CONNECTION_NAME explicitly." + ) from e + print( f"Upload a single file and create a new Dataset `{dataset_name}`, version `{dataset_version_1}`, to reference the file." ) diff --git a/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_download.py b/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_download.py index 2600f864c365..eb1332715f8a 100644 --- a/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_download.py +++ b/sdk/ai/azure-ai-projects/samples/datasets/sample_datasets_download.py @@ -21,7 +21,7 @@ Set these environment variables with your own values: 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your Microsoft Foundry project. - 2) CONNECTION_NAME - Required. The name of the Azure Storage Account connection to use for uploading files. + 2) CONNECTION_NAME - Optional. The name of the Azure Storage Account connection to use for uploading files. 3) DATASET_NAME - Optional. The name of the Dataset to create and use in this sample. 4) DATASET_VERSION - Optional. The version of the Dataset to create and use in this sample. 6) DATA_FOLDER - Optional. The folder path where the data files for upload are located. @@ -30,18 +30,20 @@ import os import re +import tempfile from dotenv import load_dotenv from azure.identity import DefaultAzureCredential from azure.ai.projects import AIProjectClient +from azure.ai.projects.models import ConnectionType from azure.storage.blob import ContainerClient load_dotenv() endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"] -connection_name = os.environ["CONNECTION_NAME"] +connection_name = os.environ.get("CONNECTION_NAME") dataset_name = os.environ.get("DATASET_NAME", "dataset-test") dataset_version = os.environ.get("DATASET_VERSION", "1.0") -download_folder = os.environ.get("DOWNLOAD_FOLDER", "downloaded_blobs") +download_folder = os.environ.get("DOWNLOAD_FOLDER", os.path.join(tempfile.gettempdir(), "downloaded_blobs")) # Construct the paths to the data folder and data file used in this sample script_dir = os.path.dirname(os.path.abspath(__file__)) @@ -52,6 +54,10 @@ AIProjectClient(endpoint=endpoint, credential=credential) as project_client, ): + connection_name = ( + connection_name or project_client.connections.get_default(ConnectionType.AZURE_STORAGE_ACCOUNT).name + ) + print( f"Upload files in a folder (including sub-folders) and create a dataset named `{dataset_name}` version `{dataset_version}`, to reference the files." ) @@ -85,7 +91,9 @@ with open(file_path, "wb") as f: f.write(blob_client.download_blob().readall()) - print(f"Downloaded: {blob_name}") + print(f"Downloaded: {blob_name} -> {file_path}") + + print(f"All files were downloaded to: {os.path.abspath(download_folder)}") print("Delete the dataset created above:") project_client.datasets.delete(name=dataset_name, version=dataset_version) diff --git a/sdk/ai/azure-ai-projects/tests/conftest.py b/sdk/ai/azure-ai-projects/tests/conftest.py index 56e8d4b9cdf1..9718b2b2f20d 100644 --- a/sdk/ai/azure-ai-projects/tests/conftest.py +++ b/sdk/ai/azure-ai-projects/tests/conftest.py @@ -169,9 +169,22 @@ def sanitize_url_paths(): # Sanitize API key from service response (this includes Application Insights connection string) add_body_key_sanitizer(json_path="credentials.key", value="sanitized-api-key") - # Sanitize SAS URI from Datasets get credential response - add_body_key_sanitizer(json_path="blobReference.credential.sasUri", value="sanitized-sas-uri") - add_body_key_sanitizer(json_path="blobReferenceForConsumption.credential.sasUri", value="sanitized-sas-uri") + # Sanitize GitHub personal access tokens that may appear in connection credentials + add_general_regex_sanitizer(regex=r"github_pat_[A-Za-z0-9_]+", value="sanitized-github-pat") + add_body_key_sanitizer( + json_path="$..authorization", + value="Bearer sanitized-github-pat", + regex=r"(?i)^Bearer\s+github_pat_[A-Za-z0-9_]+$", + ) + + # Sanitize Azure Blob account host while preserving container path and SAS shape. + # This avoids creating inconsistent recordings where sasUri points to a different + # container than the corresponding blob RequestUri entries. + add_general_regex_sanitizer( + regex=r"https://([a-z0-9-]+)\.blob\.core\.windows\.net", + value="Sanitized", + group_for_replace="1", + ) add_body_key_sanitizer( json_path="$..project_connection_id", @@ -183,7 +196,7 @@ def sanitize_url_paths(): add_body_key_sanitizer( json_path="$.input", value="sanitized-print-output", - regex=r"print contents array = .*", + regex=r"(?s)print contents array = .*", ) # Remove Stainless headers from OpenAI client requests, since they include platform and OS specific info, which we can't have in recorded requests. diff --git a/sdk/ai/azure-ai-projects/tests/samples/test_samples.py b/sdk/ai/azure-ai-projects/tests/samples/test_samples.py index 9a5ccdb60b6f..e45524f5d6a7 100644 --- a/sdk/ai/azure-ai-projects/tests/samples/test_samples.py +++ b/sdk/ai/azure-ai-projects/tests/samples/test_samples.py @@ -16,6 +16,7 @@ agent_tools_instructions, agents_instructions, memories_instructions, + resource_management_instructions, get_sample_environment_variables_map, ) @@ -47,7 +48,6 @@ class TestSamples(AzureRecordedTestCase): "sample_agent_azure_function.py", "sample_agent_computer_use.py", "sample_agent_browser_automation.py", - "sample_agent_openapi.py", ], ), ) @@ -102,3 +102,88 @@ def test_agents_samples(self, sample_path: str, **kwargs) -> None: project_endpoint=kwargs["azure_ai_project_endpoint"], model=kwargs["azure_ai_model_deployment_name"], ) + + @pytest.mark.parametrize( + "sample_path", + get_sample_paths( + "connections", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_connections_samples(self, sample_path: str, **kwargs) -> None: + kwargs = kwargs.copy() + kwargs["connection_name"] = "mcp" + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = SyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + executor.execute() + executor.validate_print_calls_by_llm( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) + + @pytest.mark.parametrize( + "sample_path", + get_sample_paths( + "files", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_files_samples(self, sample_path: str, **kwargs) -> None: + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = SyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + executor.execute() + executor.validate_print_calls_by_llm( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) + + @pytest.mark.parametrize( + "sample_path", + get_sample_paths( + "deployments", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_deployments_samples(self, sample_path: str, **kwargs) -> None: + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = SyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + executor.execute() + executor.validate_print_calls_by_llm( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) + + @pytest.mark.parametrize( + "sample_path", + get_sample_paths( + "datasets", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + def test_datasets_samples(self, sample_path: str, **kwargs) -> None: + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = SyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + executor.execute() + if self.is_live: + # Don't replay LLM validation since there probably a defect in proxy server fail to replay + # Proxy server probably not able to parse the captured print content + executor.validate_print_calls_by_llm( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) diff --git a/sdk/ai/azure-ai-projects/tests/samples/test_samples_async.py b/sdk/ai/azure-ai-projects/tests/samples/test_samples_async.py index 70ba0cd9225a..5745e61b004c 100644 --- a/sdk/ai/azure-ai-projects/tests/samples/test_samples_async.py +++ b/sdk/ai/azure-ai-projects/tests/samples/test_samples_async.py @@ -16,6 +16,7 @@ agent_tools_instructions, memories_instructions, agents_instructions, + resource_management_instructions, get_sample_environment_variables_map, ) @@ -89,3 +90,88 @@ async def test_agents_samples(self, sample_path: str, **kwargs) -> None: project_endpoint=kwargs["azure_ai_project_endpoint"], model=kwargs["azure_ai_model_deployment_name"], ) + + @pytest.mark.parametrize( + "sample_path", + get_async_sample_paths( + "connections", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_connections_samples(self, sample_path: str, **kwargs) -> None: + kwargs = kwargs.copy() + kwargs["connection_name"] = "mcp" + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = AsyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + await executor.execute_async() + await executor.validate_print_calls_by_llm_async( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) + + @pytest.mark.parametrize( + "sample_path", + get_async_sample_paths( + "files", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_files_samples(self, sample_path: str, **kwargs) -> None: + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = AsyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + await executor.execute_async() + await executor.validate_print_calls_by_llm_async( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) + + @pytest.mark.parametrize( + "sample_path", + get_async_sample_paths( + "deployments", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_deployments_samples(self, sample_path: str, **kwargs) -> None: + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = AsyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + await executor.execute_async() + await executor.validate_print_calls_by_llm_async( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) + + @pytest.mark.parametrize( + "sample_path", + get_async_sample_paths( + "datasets", + samples_to_skip=[], + ), + ) + @servicePreparer() + @SamplePathPasser() + @recorded_by_proxy_async(RecordedTransport.AZURE_CORE, RecordedTransport.HTTPX) + async def test_datasets_samples(self, sample_path: str, **kwargs) -> None: + env_var_mapping = get_sample_environment_variables_map(kwargs) + executor = AsyncSampleExecutor(self, sample_path, env_var_mapping=env_var_mapping, **kwargs) + await executor.execute_async() + if self.is_live: + # Don't replay LLM validation since there probably a defect in proxy server fail to replay + # Proxy server probably not able to parse the captured print content + await executor.validate_print_calls_by_llm_async( + instructions=resource_management_instructions, + project_endpoint=kwargs["azure_ai_project_endpoint"], + model=kwargs["azure_ai_model_deployment_name"], + ) diff --git a/sdk/ai/azure-ai-projects/tests/samples/test_samples_helpers.py b/sdk/ai/azure-ai-projects/tests/samples/test_samples_helpers.py index 234f2ee00f2f..e5417bd63911 100644 --- a/sdk/ai/azure-ai-projects/tests/samples/test_samples_helpers.py +++ b/sdk/ai/azure-ai-projects/tests/samples/test_samples_helpers.py @@ -86,6 +86,32 @@ Always include `reason` with a concise explanation tied to the observed print output.""" +resource_management_instructions = """We just ran Python code and captured a Python array of print statements. +Validate whether sample execution/output is correct for resource-management samples (for example +connections, files, and deployments). + +Successful output typically shows one or more of: +- Create/get/list/update/delete operations completing as expected. +- Returned resource objects/IDs/names/versions or other meaningful operation results. +- Consistent progress from setup to cleanup where applicable. + +Mark `correct = false` for: +- Exceptions, stack traces, explicit error/failure messages. +- Timeout/auth/connection/service errors that prevent normal completion. +- Malformed/corrupted output indicating broken processing. +- Operation failures where the sample cannot proceed as designed. + +Important distinction: +- Empty list results by themselves can be valid and should not automatically fail. +- Cleanup/delete operations that report not found may still be acceptable if the sample otherwise succeeds. +- But explicit inability/failure for required core operations should be marked `correct = false`. + +Mark `correct = true` when execution succeeds and output is consistent with the sample's intended +resource-management behavior. + +Always include `reason` with a concise explanation tied to the observed print output.""" + + def get_sample_environment_variables_map(env_kwargs: Mapping[str, Any]) -> dict[str, str]: # Map sample env-var names (uppercase) to the original kwargs key names so executors can pop them. mapping: dict[str, str] = {}