From 46b410f69864b88fa8db7cdf38429acfe04f1d96 Mon Sep 17 00:00:00 2001 From: sagemaker-bot Date: Fri, 15 May 2026 10:54:11 +0000 Subject: [PATCH] Daily Sync with Botocore v1.43.8 on 2026/05/15 --- sample/sagemaker/2017-07-24/service-2.json | 1828 ++++++++++++++++- .../main/code_injection/shape_dag.py | 740 ++++++- src/sagemaker_core/main/config_schema.py | 37 + src/sagemaker_core/main/resources.py | 1161 ++++++++++- src/sagemaker_core/main/shapes.py | 681 +++++- src/sagemaker_core/tools/api_coverage.json | 2 +- 6 files changed, 4396 insertions(+), 53 deletions(-) diff --git a/sample/sagemaker/2017-07-24/service-2.json b/sample/sagemaker/2017-07-24/service-2.json index cc75fd2..2ae0306 100644 --- a/sample/sagemaker/2017-07-24/service-2.json +++ b/sample/sagemaker/2017-07-24/service-2.json @@ -130,6 +130,50 @@ ], "documentation":"

Replaces specific nodes within a SageMaker HyperPod cluster with new hardware. BatchReplaceClusterNodes terminates the specified instances and provisions new replacement instances with the same configuration but fresh hardware. The Amazon Machine Image (AMI) and instance configuration remain the same.

This operation is useful for recovering from hardware failures or persistent issues that cannot be resolved through a reboot.

" }, + "CreateAIBenchmarkJob":{ + "name":"CreateAIBenchmarkJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"CreateAIBenchmarkJobRequest"}, + "output":{"shape":"CreateAIBenchmarkJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"}, + {"shape":"ResourceInUse"}, + {"shape":"ResourceLimitExceeded"} + ], + "documentation":"

Creates a benchmark job that runs performance benchmarks against inference infrastructure using a predefined AI workload configuration. The benchmark job measures metrics such as latency, throughput, and cost for your generative AI inference endpoints.

" + }, + "CreateAIRecommendationJob":{ + "name":"CreateAIRecommendationJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"CreateAIRecommendationJobRequest"}, + "output":{"shape":"CreateAIRecommendationJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"}, + {"shape":"ResourceInUse"}, + {"shape":"ResourceLimitExceeded"} + ], + "documentation":"

Creates a recommendation job that generates intelligent optimization recommendations for generative AI inference deployments. The job analyzes your model, workload configuration, and performance targets to recommend optimal instance types, model optimization techniques (such as quantization and speculative decoding), and deployment configurations.

" + }, + "CreateAIWorkloadConfig":{ + "name":"CreateAIWorkloadConfig", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"CreateAIWorkloadConfigRequest"}, + "output":{"shape":"CreateAIWorkloadConfigResponse"}, + "errors":[ + {"shape":"ResourceInUse"}, + {"shape":"ResourceLimitExceeded"} + ], + "documentation":"

Creates a reusable AI workload configuration that defines datasets, data sources, and benchmark tool settings for consistent performance testing of generative AI inference deployments on Amazon SageMaker AI.

" + }, "CreateAction":{ "name":"CreateAction", "http":{ @@ -1043,6 +1087,46 @@ ], "documentation":"

Creates a new work team for labeling your data. A work team is defined by one or more Amazon Cognito user pools. You must first create the user pools before you can create a work team.

You cannot create more than 25 work teams in an account and region.

" }, + "DeleteAIBenchmarkJob":{ + "name":"DeleteAIBenchmarkJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"DeleteAIBenchmarkJobRequest"}, + "output":{"shape":"DeleteAIBenchmarkJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Deletes the specified AI benchmark job.

" + }, + "DeleteAIRecommendationJob":{ + "name":"DeleteAIRecommendationJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"DeleteAIRecommendationJobRequest"}, + "output":{"shape":"DeleteAIRecommendationJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Deletes the specified AI recommendation job.

" + }, + "DeleteAIWorkloadConfig":{ + "name":"DeleteAIWorkloadConfig", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"DeleteAIWorkloadConfigRequest"}, + "output":{"shape":"DeleteAIWorkloadConfigResponse"}, + "errors":[ + {"shape":"ResourceNotFound"}, + {"shape":"ResourceInUse"} + ], + "documentation":"

Deletes the specified AI workload configuration. You cannot delete a configuration that is referenced by an active benchmark job.

" + }, "DeleteAction":{ "name":"DeleteAction", "http":{ @@ -1752,6 +1836,45 @@ "input":{"shape":"DeregisterDevicesRequest"}, "documentation":"

Deregisters the specified devices. After you deregister a device, you will need to re-register the devices.

" }, + "DescribeAIBenchmarkJob":{ + "name":"DescribeAIBenchmarkJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"DescribeAIBenchmarkJobRequest"}, + "output":{"shape":"DescribeAIBenchmarkJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Returns details of an AI benchmark job, including its status, configuration, target endpoint, and timing information.

" + }, + "DescribeAIRecommendationJob":{ + "name":"DescribeAIRecommendationJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"DescribeAIRecommendationJobRequest"}, + "output":{"shape":"DescribeAIRecommendationJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Returns details of an AI recommendation job, including its status, model source, performance targets, optimization recommendations, and deployment configurations.

" + }, + "DescribeAIWorkloadConfig":{ + "name":"DescribeAIWorkloadConfig", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"DescribeAIWorkloadConfigRequest"}, + "output":{"shape":"DescribeAIWorkloadConfigResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Returns details of an AI workload configuration, including the dataset configuration, benchmark tool settings, tags, and creation time.

" + }, "DescribeAction":{ "name":"DescribeAction", "http":{ @@ -2774,6 +2897,36 @@ ], "documentation":"

Import hub content.

" }, + "ListAIBenchmarkJobs":{ + "name":"ListAIBenchmarkJobs", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"ListAIBenchmarkJobsRequest"}, + "output":{"shape":"ListAIBenchmarkJobsResponse"}, + "documentation":"

Returns a list of AI benchmark jobs in your account. You can filter the results by name, status, and creation time, and sort the results. The response is paginated.

" + }, + "ListAIRecommendationJobs":{ + "name":"ListAIRecommendationJobs", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"ListAIRecommendationJobsRequest"}, + "output":{"shape":"ListAIRecommendationJobsResponse"}, + "documentation":"

Returns a list of AI recommendation jobs in your account. You can filter the results by name, status, and creation time, and sort the results. The response is paginated.

" + }, + "ListAIWorkloadConfigs":{ + "name":"ListAIWorkloadConfigs", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"ListAIWorkloadConfigsRequest"}, + "output":{"shape":"ListAIWorkloadConfigsResponse"}, + "documentation":"

Returns a list of AI workload configurations in your account. You can filter the results by name and creation time, and sort the results. The response is paginated.

" + }, "ListActions":{ "name":"ListActions", "http":{ @@ -3921,6 +4074,32 @@ ], "documentation":"

Initiates a remote connection session between a local integrated development environments (IDEs) and a remote SageMaker space.

" }, + "StopAIBenchmarkJob":{ + "name":"StopAIBenchmarkJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"StopAIBenchmarkJobRequest"}, + "output":{"shape":"StopAIBenchmarkJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Stops a running AI benchmark job.

" + }, + "StopAIRecommendationJob":{ + "name":"StopAIRecommendationJob", + "http":{ + "method":"POST", + "requestUri":"/" + }, + "input":{"shape":"StopAIRecommendationJobRequest"}, + "output":{"shape":"StopAIRecommendationJobResponse"}, + "errors":[ + {"shape":"ResourceNotFound"} + ], + "documentation":"

Stops a running AI recommendation job.

" + }, "StopAutoMLJob":{ "name":"StopAutoMLJob", "http":{ @@ -4728,6 +4907,719 @@ } }, "shapes":{ + "AIBenchmarkEndpoint":{ + "type":"structure", + "required":["Identifier"], + "members":{ + "Identifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the SageMaker endpoint to benchmark.

" + }, + "TargetContainerHostname":{ + "shape":"String", + "documentation":"

The hostname of the specific container to target within a multi-container endpoint.

" + }, + "InferenceComponents":{ + "shape":"AIBenchmarkInferenceComponentList", + "documentation":"

The list of inference components to benchmark on the endpoint.

" + } + }, + "documentation":"

The SageMaker endpoint configuration for benchmarking.

" + }, + "AIBenchmarkInferenceComponent":{ + "type":"structure", + "required":["Identifier"], + "members":{ + "Identifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the inference component.

" + } + }, + "documentation":"

An inference component to benchmark.

" + }, + "AIBenchmarkInferenceComponentList":{ + "type":"list", + "member":{"shape":"AIBenchmarkInferenceComponent"} + }, + "AIBenchmarkJobArn":{ + "type":"string", + "max":256, + "min":0, + "pattern":"arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:ai-benchmark-job/[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}" + }, + "AIBenchmarkJobStatus":{ + "type":"string", + "enum":[ + "InProgress", + "Completed", + "Failed", + "Stopping", + "Stopped" + ] + }, + "AIBenchmarkJobSummary":{ + "type":"structure", + "required":[ + "AIBenchmarkJobName", + "AIBenchmarkJobArn", + "AIBenchmarkJobStatus", + "CreationTime" + ], + "members":{ + "AIBenchmarkJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the benchmark job.

" + }, + "AIBenchmarkJobArn":{ + "shape":"AIBenchmarkJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the benchmark job.

" + }, + "AIBenchmarkJobStatus":{ + "shape":"AIBenchmarkJobStatus", + "documentation":"

The status of the benchmark job.

" + }, + "CreationTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the benchmark job was created.

" + }, + "EndTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the benchmark job completed.

" + }, + "AIWorkloadConfigName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI workload configuration used by the benchmark job.

" + } + }, + "documentation":"

Summary information about an AI benchmark job.

" + }, + "AIBenchmarkJobSummaryList":{ + "type":"list", + "member":{"shape":"AIBenchmarkJobSummary"} + }, + "AIBenchmarkNetworkConfig":{ + "type":"structure", + "members":{ + "VpcConfig":{ + "shape":"VpcConfig", + "documentation":"

The VPC configuration, including security group IDs and subnet IDs.

" + } + }, + "documentation":"

The network configuration for an AI benchmark job.

" + }, + "AIBenchmarkOutputConfig":{ + "type":"structure", + "required":["S3OutputLocation"], + "members":{ + "S3OutputLocation":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI where benchmark results are stored.

" + } + }, + "documentation":"

The output configuration for an AI benchmark job.

" + }, + "AIBenchmarkOutputResult":{ + "type":"structure", + "required":["S3OutputLocation"], + "members":{ + "S3OutputLocation":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI where benchmark results are stored.

" + }, + "CloudWatchLogs":{ + "shape":"AICloudWatchLogsList", + "documentation":"

The CloudWatch log information for the benchmark job.

" + } + }, + "documentation":"

The output result of an AI benchmark job, including the Amazon S3 location and CloudWatch log information.

" + }, + "AIBenchmarkTarget":{ + "type":"structure", + "members":{ + "Endpoint":{ + "shape":"AIBenchmarkEndpoint", + "documentation":"

The SageMaker endpoint to benchmark.

" + } + }, + "documentation":"

The target for an AI benchmark job. This is a union type — specify one of the members.

", + "union":true + }, + "AICapacityReservationConfig":{ + "type":"structure", + "members":{ + "CapacityReservationPreference":{ + "shape":"AICapacityReservationPreference", + "documentation":"

The capacity reservation preference. The only valid value is capacity-reservations-only.

" + }, + "MlReservationArns":{ + "shape":"AIMlReservationArnList", + "documentation":"

The list of ML reservation ARNs to use.

" + } + }, + "documentation":"

The capacity reservation configuration for an AI recommendation job.

" + }, + "AICapacityReservationPreference":{ + "type":"string", + "enum":["capacity-reservations-only"] + }, + "AIChannelName":{ + "type":"string", + "max":64, + "min":1, + "pattern":"[A-Za-z0-9\\.\\-_]+" + }, + "AICloudWatchLogs":{ + "type":"structure", + "members":{ + "LogGroupArn":{ + "shape":"String", + "documentation":"

The Amazon Resource Name (ARN) of the CloudWatch log group.

" + }, + "LogStreamName":{ + "shape":"String", + "documentation":"

The name of the CloudWatch log stream.

" + } + }, + "documentation":"

CloudWatch log information for an AI benchmark or recommendation job.

" + }, + "AICloudWatchLogsList":{ + "type":"list", + "member":{"shape":"AICloudWatchLogs"} + }, + "AIDatasetConfig":{ + "type":"structure", + "members":{ + "InputDataConfig":{ + "shape":"AIWorkloadInputDataConfigList", + "documentation":"

An array of input data channel configurations for the workload.

" + } + }, + "documentation":"

The dataset configuration for an AI workload. This is a union type — specify one of the members.

", + "union":true + }, + "AIEntityName":{ + "type":"string", + "max":63, + "min":1, + "pattern":"[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}" + }, + "AIInferenceSpecificationName":{ + "type":"string", + "max":63, + "min":0, + "pattern":"[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}" + }, + "AIMlReservationArn":{ + "type":"string", + "max":256, + "min":0, + "pattern":"arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:[a-z0-9\\-]{1,14}/.*" + }, + "AIMlReservationArnList":{ + "type":"list", + "member":{"shape":"AIMlReservationArn"} + }, + "AIModelSource":{ + "type":"structure", + "members":{ + "S3":{ + "shape":"AIModelSourceS3", + "documentation":"

The Amazon S3 location of the model artifacts.

" + } + }, + "documentation":"

The source of the model for an AI recommendation job. This is a union type.

", + "union":true + }, + "AIModelSourceS3":{ + "type":"structure", + "members":{ + "S3Uri":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI of the model artifacts.

" + } + }, + "documentation":"

The Amazon S3 model source configuration.

" + }, + "AIRecommendation":{ + "type":"structure", + "members":{ + "RecommendationDescription":{ + "shape":"String", + "documentation":"

A description of the recommendation.

" + }, + "OptimizationDetails":{ + "shape":"AIRecommendationOptimizationDetailList", + "documentation":"

The optimization techniques applied in this recommendation.

" + }, + "ModelDetails":{ + "shape":"AIRecommendationModelDetails", + "documentation":"

Details about the model package associated with this recommendation.

" + }, + "DeploymentConfiguration":{ + "shape":"AIRecommendationDeploymentConfiguration", + "documentation":"

The deployment configuration for this recommendation, including the container image, instance type, instance count, and environment variables.

" + }, + "AIBenchmarkJobArn":{ + "shape":"AIBenchmarkJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the benchmark job associated with this recommendation.

" + }, + "ExpectedPerformance":{ + "shape":"ExpectedPerformanceList", + "documentation":"

The expected performance metrics for this recommendation.

" + } + }, + "documentation":"

An optimization recommendation generated by an AI recommendation job.

" + }, + "AIRecommendationAllowOptimization":{ + "type":"boolean", + "box":true + }, + "AIRecommendationComputeSpec":{ + "type":"structure", + "members":{ + "InstanceTypes":{ + "shape":"AIRecommendationInstanceTypeList", + "documentation":"

The list of instance types to consider for recommendations. You can specify up to 3 instance types.

" + }, + "CapacityReservationConfig":{ + "shape":"AICapacityReservationConfig", + "documentation":"

The capacity reservation configuration.

" + } + }, + "documentation":"

The compute resource specification for an AI recommendation job.

" + }, + "AIRecommendationConstraint":{ + "type":"structure", + "required":["Metric"], + "members":{ + "Metric":{ + "shape":"AIRecommendationMetric", + "documentation":"

The performance metric. Valid values are ttft-ms (time to first token in milliseconds), throughput, and cost.

" + } + }, + "documentation":"

A performance constraint for an AI recommendation job.

" + }, + "AIRecommendationConstraintList":{ + "type":"list", + "member":{"shape":"AIRecommendationConstraint"} + }, + "AIRecommendationCopyCountPerInstance":{ + "type":"integer", + "box":true + }, + "AIRecommendationDeploymentConfiguration":{ + "type":"structure", + "members":{ + "S3":{ + "shape":"AIRecommendationDeploymentS3ChannelList", + "documentation":"

The Amazon S3 data channels for the deployment.

" + }, + "ImageUri":{ + "shape":"String", + "documentation":"

The URI of the container image for the deployment.

" + }, + "InstanceType":{ + "shape":"AIRecommendationInstanceType", + "documentation":"

The recommended instance type for the deployment.

" + }, + "InstanceCount":{ + "shape":"AIRecommendationInstanceCount", + "documentation":"

The recommended number of instances for the deployment.

" + }, + "CopyCountPerInstance":{ + "shape":"AIRecommendationCopyCountPerInstance", + "documentation":"

The number of model copies per instance.

" + }, + "EnvironmentVariables":{ + "shape":"EnvironmentMap", + "documentation":"

The environment variables for the deployment.

" + } + }, + "documentation":"

The deployment configuration for a recommendation.

" + }, + "AIRecommendationDeploymentS3Channel":{ + "type":"structure", + "members":{ + "ChannelName":{ + "shape":"AIChannelName", + "documentation":"

A custom name for this Amazon S3 data channel.

" + }, + "Uri":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI of the data for this channel.

" + } + }, + "documentation":"

An Amazon S3 data channel for a recommended deployment configuration, containing model artifacts or optimized model outputs.

" + }, + "AIRecommendationDeploymentS3ChannelList":{ + "type":"list", + "member":{"shape":"AIRecommendationDeploymentS3Channel"} + }, + "AIRecommendationInferenceFramework":{ + "type":"string", + "enum":[ + "LMI", + "VLLM" + ] + }, + "AIRecommendationInferenceSpecification":{ + "type":"structure", + "members":{ + "Framework":{ + "shape":"AIRecommendationInferenceFramework", + "documentation":"

The inference framework. Valid values are LMI and VLLM.

" + } + }, + "documentation":"

The inference framework for an AI recommendation job.

" + }, + "AIRecommendationInstanceCount":{ + "type":"integer", + "box":true + }, + "AIRecommendationInstanceDetail":{ + "type":"structure", + "members":{ + "InstanceType":{ + "shape":"AIRecommendationInstanceType", + "documentation":"

The recommended instance type.

" + }, + "InstanceCount":{ + "shape":"AIRecommendationInstanceCount", + "documentation":"

The recommended number of instances.

" + }, + "CopyCountPerInstance":{ + "shape":"AIRecommendationCopyCountPerInstance", + "documentation":"

The number of model copies per instance.

" + } + }, + "documentation":"

Instance details for a recommendation.

" + }, + "AIRecommendationInstanceDetailList":{ + "type":"list", + "member":{"shape":"AIRecommendationInstanceDetail"} + }, + "AIRecommendationInstanceType":{ + "type":"string", + "enum":[ + "ml.g5.xlarge", + "ml.g5.2xlarge", + "ml.g5.4xlarge", + "ml.g5.8xlarge", + "ml.g5.12xlarge", + "ml.g5.16xlarge", + "ml.g5.24xlarge", + "ml.g5.48xlarge", + "ml.g6.xlarge", + "ml.g6.2xlarge", + "ml.g6.4xlarge", + "ml.g6.8xlarge", + "ml.g6.12xlarge", + "ml.g6.16xlarge", + "ml.g6.24xlarge", + "ml.g6.48xlarge", + "ml.g6e.xlarge", + "ml.g6e.2xlarge", + "ml.g6e.4xlarge", + "ml.g6e.8xlarge", + "ml.g6e.12xlarge", + "ml.g6e.16xlarge", + "ml.g6e.24xlarge", + "ml.g6e.48xlarge", + "ml.g7e.2xlarge", + "ml.g7e.4xlarge", + "ml.g7e.8xlarge", + "ml.g7e.12xlarge", + "ml.g7e.24xlarge", + "ml.g7e.48xlarge", + "ml.p3.2xlarge", + "ml.p3.8xlarge", + "ml.p3.16xlarge", + "ml.p4d.24xlarge", + "ml.p4de.24xlarge", + "ml.p5.4xlarge", + "ml.p5.48xlarge", + "ml.p5e.48xlarge", + "ml.p5en.48xlarge" + ] + }, + "AIRecommendationInstanceTypeList":{ + "type":"list", + "member":{"shape":"AIRecommendationInstanceType"}, + "max":3, + "min":0 + }, + "AIRecommendationJobArn":{ + "type":"string", + "max":256, + "min":0, + "pattern":"arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:ai-recommendation-job/[a-zA-Z0-9](-*[a-zA-Z0-9]){0,62}" + }, + "AIRecommendationJobStatus":{ + "type":"string", + "enum":[ + "InProgress", + "Completed", + "Failed", + "Stopping", + "Stopped" + ] + }, + "AIRecommendationJobSummary":{ + "type":"structure", + "required":[ + "AIRecommendationJobName", + "AIRecommendationJobArn", + "AIRecommendationJobStatus", + "CreationTime" + ], + "members":{ + "AIRecommendationJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the recommendation job.

" + }, + "AIRecommendationJobArn":{ + "shape":"AIRecommendationJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the recommendation job.

" + }, + "AIRecommendationJobStatus":{ + "shape":"AIRecommendationJobStatus", + "documentation":"

The status of the recommendation job.

" + }, + "CreationTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the recommendation job was created.

" + }, + "EndTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the recommendation job completed.

" + } + }, + "documentation":"

Summary information about an AI recommendation job.

" + }, + "AIRecommendationJobSummaryList":{ + "type":"list", + "member":{"shape":"AIRecommendationJobSummary"} + }, + "AIRecommendationList":{ + "type":"list", + "member":{"shape":"AIRecommendation"} + }, + "AIRecommendationMetric":{ + "type":"string", + "enum":[ + "ttft-ms", + "throughput", + "cost" + ] + }, + "AIRecommendationModelDetails":{ + "type":"structure", + "members":{ + "ModelPackageArn":{ + "shape":"ModelPackageArn", + "documentation":"

The Amazon Resource Name (ARN) of the model package.

" + }, + "InferenceSpecificationName":{ + "shape":"AIInferenceSpecificationName", + "documentation":"

The name of the inference specification within the model package.

" + }, + "InstanceDetails":{ + "shape":"AIRecommendationInstanceDetailList", + "documentation":"

The instance details for this recommendation, including instance type, count, and model copies per instance.

" + } + }, + "documentation":"

Details about the model package in a recommendation.

" + }, + "AIRecommendationOptimizationConfigMap":{ + "type":"map", + "key":{"shape":"String"}, + "value":{"shape":"String"} + }, + "AIRecommendationOptimizationDetail":{ + "type":"structure", + "required":["OptimizationType"], + "members":{ + "OptimizationType":{ + "shape":"AIRecommendationOptimizationType", + "documentation":"

The type of optimization. Valid values are SpeculativeDecoding and KernelTuning.

" + }, + "OptimizationConfig":{ + "shape":"AIRecommendationOptimizationConfigMap", + "documentation":"

A map of configuration parameters for the optimization technique.

" + } + }, + "documentation":"

Details about an optimization technique applied in a recommendation.

" + }, + "AIRecommendationOptimizationDetailList":{ + "type":"list", + "member":{"shape":"AIRecommendationOptimizationDetail"} + }, + "AIRecommendationOptimizationType":{ + "type":"string", + "enum":[ + "SpeculativeDecoding", + "KernelTuning" + ] + }, + "AIRecommendationOutputConfig":{ + "type":"structure", + "members":{ + "S3OutputLocation":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI where recommendation results are stored.

" + }, + "ModelPackageGroupIdentifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the model package group where the optimized model is registered as a new model package version.

" + } + }, + "documentation":"

The output configuration for an AI recommendation job.

" + }, + "AIRecommendationOutputResult":{ + "type":"structure", + "required":["S3OutputLocation"], + "members":{ + "S3OutputLocation":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI where the recommendation job writes its output results.

" + }, + "ModelPackageGroupIdentifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the model package group where deployment-ready model packages are registered.

" + } + }, + "documentation":"

The output configuration for an AI recommendation job, including the S3 location for results and the model package group for deployment.

" + }, + "AIRecommendationPerformanceMetric":{ + "type":"structure", + "required":[ + "Metric", + "Value" + ], + "members":{ + "Metric":{ + "shape":"String", + "documentation":"

The name of the performance metric.

" + }, + "Stat":{ + "shape":"String", + "documentation":"

The statistical measure for the metric.

" + }, + "Value":{ + "shape":"String", + "documentation":"

The value of the metric.

" + }, + "Unit":{ + "shape":"String", + "documentation":"

The unit of the metric value.

" + } + }, + "documentation":"

An expected performance metric for a recommendation.

" + }, + "AIRecommendationPerformanceTarget":{ + "type":"structure", + "required":["Constraints"], + "members":{ + "Constraints":{ + "shape":"AIRecommendationConstraintList", + "documentation":"

An array of performance constraints that define the optimization objectives.

" + } + }, + "documentation":"

The performance targets for an AI recommendation job.

" + }, + "AIResourceIdentifier":{ + "type":"string", + "max":256, + "min":1, + "pattern":"(arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:[a-z\\-]*/)?([a-zA-Z0-9]([a-zA-Z0-9\\-]){0,62})(?The name of the AI workload configuration.

" + }, + "AIWorkloadConfigArn":{ + "shape":"AIWorkloadConfigArn", + "documentation":"

The Amazon Resource Name (ARN) of the AI workload configuration.

" + }, + "CreationTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the configuration was created.

" + } + }, + "documentation":"

Summary information about an AI workload configuration.

" + }, + "AIWorkloadConfigSummaryList":{ + "type":"list", + "member":{"shape":"AIWorkloadConfigSummary"} + }, + "AIWorkloadConfigs":{ + "type":"structure", + "required":["WorkloadSpec"], + "members":{ + "WorkloadSpec":{ + "shape":"WorkloadSpec", + "documentation":"

The workload specification that defines benchmark parameters.

" + } + }, + "documentation":"

The benchmark tool configuration for an AI workload.

" + }, + "AIWorkloadDataSource":{ + "type":"structure", + "members":{ + "S3DataSource":{ + "shape":"AIWorkloadS3DataSource", + "documentation":"

The Amazon S3 data source configuration.

" + } + }, + "documentation":"

The data source for an AI workload input data channel.

" + }, + "AIWorkloadInputDataConfig":{ + "type":"structure", + "required":[ + "ChannelName", + "DataSource" + ], + "members":{ + "ChannelName":{ + "shape":"AIChannelName", + "documentation":"

The logical name for the data channel.

" + }, + "DataSource":{ + "shape":"AIWorkloadDataSource", + "documentation":"

The data source for this channel.

" + } + }, + "documentation":"

A channel of input data for an AI workload configuration. Each channel has a name and a data source.

" + }, + "AIWorkloadInputDataConfigList":{ + "type":"list", + "member":{"shape":"AIWorkloadInputDataConfig"} + }, + "AIWorkloadS3DataSource":{ + "type":"structure", + "required":["S3Uri"], + "members":{ + "S3Uri":{ + "shape":"S3Uri", + "documentation":"

The Amazon S3 URI of the data.

" + } + }, + "documentation":"

The Amazon S3 data source for an AI workload.

" + }, "AbsoluteBorrowLimitResourceList":{ "type":"list", "member":{"shape":"ComputeQuotaResourceConfig"}, @@ -5647,7 +6539,8 @@ "ml.r6id.12xlarge", "ml.r6id.16xlarge", "ml.r6id.24xlarge", - "ml.r6id.32xlarge" + "ml.r6id.32xlarge", + "ml.p5.4xlarge" ] }, "AppLifecycleManagement":{ @@ -9090,10 +9983,23 @@ "Description":{ "shape":"String", "documentation":"

A human-readable description of the event.

" + }, + "EventLevel":{ + "shape":"ClusterEventLevel", + "documentation":"

The severity level of the event. Valid values are Info, Warn, and Error.

" } }, "documentation":"

Detailed information about a specific event in a HyperPod cluster.

" }, + "ClusterEventLevel":{ + "type":"string", + "documentation":"

The severity level for a HyperPod cluster event.

", + "enum":[ + "Info", + "Warn", + "Error" + ] + }, "ClusterEventMaxResults":{ "type":"integer", "box":true, @@ -9155,6 +10061,10 @@ "Description":{ "shape":"String", "documentation":"

A brief, human-readable description of the event.

" + }, + "EventLevel":{ + "shape":"ClusterEventLevel", + "documentation":"

The severity level of the event. Valid values are Info, Warn, and Error.

" } }, "documentation":"

A summary of an event in a HyperPod cluster.

" @@ -9202,6 +10112,14 @@ }, "documentation":"

Defines the configuration for attaching an Amazon FSx for OpenZFS file system to instances in a SageMaker HyperPod cluster instance group.

" }, + "ClusterImageVersionStatus":{ + "type":"string", + "documentation":"

The status of the Amazon Machine Image (AMI) version for the HyperPod cluster instance group, node, or cluster. The AMI version is determined at the instance group level, and all nodes within an instance group run the same AMI. The cluster-level status is aggregated across all instance groups.

", + "enum":[ + "UpToDate", + "UpdateAvailable" + ] + }, "ClusterInstanceCount":{ "type":"integer", "box":true, @@ -9287,6 +10205,10 @@ "shape":"ImageId", "documentation":"

The ID of the Amazon Machine Image (AMI) desired for the instance group.

" }, + "ImageVersionStatus":{ + "shape":"ClusterImageVersionStatus", + "documentation":"

The status of the image version for the instance group. Indicates whether the instance group is running the latest image version or if an update is available.

" + }, "ActiveOperations":{ "shape":"ActiveOperations", "documentation":"

A map indicating active operations currently in progress for the instance group of a SageMaker HyperPod cluster. When there is a scaling operation in progress, this map contains a key Scaling with value 1.

" @@ -9962,6 +10884,10 @@ "shape":"ImageId", "documentation":"

The ID of the Amazon Machine Image (AMI) desired for the node.

" }, + "ImageVersionStatus":{ + "shape":"ClusterImageVersionStatus", + "documentation":"

The status of the image version for the cluster node.

" + }, "UltraServerInfo":{ "shape":"UltraServerInfo", "documentation":"

Contains information about the UltraServer.

" @@ -10065,6 +10991,10 @@ "PrivateDnsHostname":{ "shape":"ClusterPrivateDnsHostname", "documentation":"

The private DNS hostname of the SageMaker HyperPod cluster node.

" + }, + "ImageVersionStatus":{ + "shape":"ClusterImageVersionStatus", + "documentation":"

The status of the image version for the cluster node.

" } }, "documentation":"

Lists a summary of the properties of an instance (also called a node interchangeably) of a SageMaker HyperPod cluster.

" @@ -10201,8 +11131,7 @@ "InstanceCount", "InstanceGroupName", "InstanceType", - "ExecutionRole", - "EnvironmentConfig" + "ExecutionRole" ], "members":{ "InstanceCount":{ @@ -11256,6 +12185,151 @@ "min":2, "pattern":"[A-Z]{2}" }, + "CreateAIBenchmarkJobRequest":{ + "type":"structure", + "required":[ + "AIBenchmarkJobName", + "BenchmarkTarget", + "OutputConfig", + "AIWorkloadConfigIdentifier", + "RoleArn" + ], + "members":{ + "AIBenchmarkJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI benchmark job. The name must be unique within your Amazon Web Services account in the current Amazon Web Services Region.

" + }, + "BenchmarkTarget":{ + "shape":"AIBenchmarkTarget", + "documentation":"

The target endpoint to benchmark. Specify a SageMaker endpoint by providing its name or Amazon Resource Name (ARN).

" + }, + "OutputConfig":{ + "shape":"AIBenchmarkOutputConfig", + "documentation":"

The output configuration for the benchmark job, including the Amazon S3 location where benchmark results are stored.

" + }, + "AIWorkloadConfigIdentifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the AI workload configuration to use for this benchmark job.

" + }, + "RoleArn":{ + "shape":"RoleArn", + "documentation":"

The Amazon Resource Name (ARN) of an IAM role that enables Amazon SageMaker AI to perform tasks on your behalf.

" + }, + "NetworkConfig":{ + "shape":"AIBenchmarkNetworkConfig", + "documentation":"

The network configuration for the benchmark job, including VPC settings.

" + }, + "Tags":{ + "shape":"TagList", + "documentation":"

The metadata that you apply to Amazon Web Services resources to help you categorize and organize them. Each tag consists of a key and a value, both of which you define.

" + } + } + }, + "CreateAIBenchmarkJobResponse":{ + "type":"structure", + "required":["AIBenchmarkJobArn"], + "members":{ + "AIBenchmarkJobArn":{ + "shape":"AIBenchmarkJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the created benchmark job.

" + } + } + }, + "CreateAIRecommendationJobRequest":{ + "type":"structure", + "required":[ + "AIRecommendationJobName", + "ModelSource", + "OutputConfig", + "AIWorkloadConfigIdentifier", + "PerformanceTarget", + "RoleArn" + ], + "members":{ + "AIRecommendationJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI recommendation job. The name must be unique within your Amazon Web Services account in the current Amazon Web Services Region.

" + }, + "ModelSource":{ + "shape":"AIModelSource", + "documentation":"

The source of the model to optimize. Specify the Amazon S3 location of the model artifacts.

" + }, + "OutputConfig":{ + "shape":"AIRecommendationOutputConfig", + "documentation":"

The output configuration for the recommendation job, including the Amazon S3 location for results and an optional model package group where the optimized model is registered.

" + }, + "AIWorkloadConfigIdentifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the AI workload configuration to use for this recommendation job.

" + }, + "PerformanceTarget":{ + "shape":"AIRecommendationPerformanceTarget", + "documentation":"

The performance targets for the recommendation job. Specify constraints on metrics such as time to first token (ttft-ms), throughput, or cost.

" + }, + "RoleArn":{ + "shape":"RoleArn", + "documentation":"

The Amazon Resource Name (ARN) of an IAM role that enables Amazon SageMaker AI to perform tasks on your behalf.

" + }, + "InferenceSpecification":{ + "shape":"AIRecommendationInferenceSpecification", + "documentation":"

The inference framework configuration. Specify the framework (such as LMI or vLLM) for the recommendation job.

" + }, + "OptimizeModel":{ + "shape":"AIRecommendationAllowOptimization", + "documentation":"

Whether to allow model optimization techniques such as quantization, speculative decoding, and kernel tuning. The default is true.

" + }, + "ComputeSpec":{ + "shape":"AIRecommendationComputeSpec", + "documentation":"

The compute resource specification for the recommendation job. You can specify up to 3 instance types to consider, and optionally provide capacity reservation configuration.

" + }, + "Tags":{ + "shape":"TagList", + "documentation":"

The metadata that you apply to Amazon Web Services resources to help you categorize and organize them.

" + } + } + }, + "CreateAIRecommendationJobResponse":{ + "type":"structure", + "required":["AIRecommendationJobArn"], + "members":{ + "AIRecommendationJobArn":{ + "shape":"AIRecommendationJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the created recommendation job.

" + } + } + }, + "CreateAIWorkloadConfigRequest":{ + "type":"structure", + "required":["AIWorkloadConfigName"], + "members":{ + "AIWorkloadConfigName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI workload configuration. The name must be unique within your Amazon Web Services account in the current Amazon Web Services Region.

" + }, + "DatasetConfig":{ + "shape":"AIDatasetConfig", + "documentation":"

The dataset configuration for the workload. Specify input data channels with their data sources for benchmark workloads.

" + }, + "AIWorkloadConfigs":{ + "shape":"AIWorkloadConfigs", + "documentation":"

The benchmark tool configuration and workload specification. Provide the specification as an inline YAML or JSON string.

" + }, + "Tags":{ + "shape":"TagList", + "documentation":"

The metadata that you apply to Amazon Web Services resources to help you categorize and organize them. Each tag consists of a key and a value, both of which you define. For more information, see Tagging Amazon Web Services Resources in the Amazon Web Services General Reference.

" + } + } + }, + "CreateAIWorkloadConfigResponse":{ + "type":"structure", + "required":["AIWorkloadConfigArn"], + "members":{ + "AIWorkloadConfigArn":{ + "shape":"AIWorkloadConfigArn", + "documentation":"

The Amazon Resource Name (ARN) of the created AI workload configuration.

" + } + } + }, "CreateActionRequest":{ "type":"structure", "required":[ @@ -12739,6 +13813,10 @@ "shape":"InferenceComponentSpecification", "documentation":"

Details about the resources to deploy with this inference component, including the model, container, and compute resources.

" }, + "Specifications":{ + "shape":"InferenceComponentSpecificationList", + "documentation":"

A list of specification objects for the inference component, one per instance type. Use this parameter when you want to deploy a different model or resource configuration for the inference component on each instance type. You can use either this parameter or the singular Specification parameter, but not both.

" + }, "RuntimeConfig":{ "shape":"InferenceComponentRuntimeConfig", "documentation":"

Runtime settings for a model that is deployed with an inference component.

" @@ -13303,6 +14381,10 @@ "Tags":{ "shape":"TagList", "documentation":"

A list of key value pairs associated with the model group. For more information, see Tagging Amazon Web Services resources in the Amazon Web Services General Reference Guide.

" + }, + "ManagedConfiguration":{ + "shape":"ManagedConfiguration", + "documentation":"

The managed configuration of the model package group.

" } } }, @@ -13413,6 +14495,10 @@ "ModelLifeCycle":{ "shape":"ModelLifeCycle", "documentation":"

A structure describing the current state of the model in its life cycle.

" + }, + "ManagedStorageType":{ + "shape":"ManagedStorageType", + "documentation":"

The storage type of the model package.

" } } }, @@ -15239,6 +16325,63 @@ "max":65535, "min":0 }, + "DeleteAIBenchmarkJobRequest":{ + "type":"structure", + "required":["AIBenchmarkJobName"], + "members":{ + "AIBenchmarkJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI benchmark job to delete.

" + } + } + }, + "DeleteAIBenchmarkJobResponse":{ + "type":"structure", + "members":{ + "AIBenchmarkJobArn":{ + "shape":"AIBenchmarkJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the deleted benchmark job.

" + } + } + }, + "DeleteAIRecommendationJobRequest":{ + "type":"structure", + "required":["AIRecommendationJobName"], + "members":{ + "AIRecommendationJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI recommendation job to delete.

" + } + } + }, + "DeleteAIRecommendationJobResponse":{ + "type":"structure", + "members":{ + "AIRecommendationJobArn":{ + "shape":"AIRecommendationJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the deleted recommendation job.

" + } + } + }, + "DeleteAIWorkloadConfigRequest":{ + "type":"structure", + "required":["AIWorkloadConfigName"], + "members":{ + "AIWorkloadConfigName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI workload configuration to delete.

" + } + } + }, + "DeleteAIWorkloadConfigResponse":{ + "type":"structure", + "members":{ + "AIWorkloadConfigArn":{ + "shape":"AIWorkloadConfigArn", + "documentation":"

The Amazon Resource Name (ARN) of the deleted AI workload configuration.

" + } + } + }, "DeleteActionRequest":{ "type":"structure", "required":["ActionName"], @@ -16263,6 +17406,220 @@ }, "documentation":"

Information that SageMaker Neo automatically derived about the model.

" }, + "DescribeAIBenchmarkJobRequest":{ + "type":"structure", + "required":["AIBenchmarkJobName"], + "members":{ + "AIBenchmarkJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI benchmark job to describe.

" + } + } + }, + "DescribeAIBenchmarkJobResponse":{ + "type":"structure", + "required":[ + "AIBenchmarkJobName", + "AIBenchmarkJobArn", + "AIBenchmarkJobStatus", + "BenchmarkTarget", + "OutputConfig", + "AIWorkloadConfigIdentifier", + "RoleArn", + "CreationTime" + ], + "members":{ + "AIBenchmarkJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI benchmark job.

" + }, + "AIBenchmarkJobArn":{ + "shape":"AIBenchmarkJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the AI benchmark job.

" + }, + "AIBenchmarkJobStatus":{ + "shape":"AIBenchmarkJobStatus", + "documentation":"

The status of the AI benchmark job.

" + }, + "FailureReason":{ + "shape":"FailureReason", + "documentation":"

If the benchmark job failed, the reason it failed.

" + }, + "BenchmarkTarget":{ + "shape":"AIBenchmarkTarget", + "documentation":"

The target endpoint that was benchmarked.

" + }, + "OutputConfig":{ + "shape":"AIBenchmarkOutputResult", + "documentation":"

The output configuration for the benchmark job, including the Amazon S3 output location and CloudWatch log information.

" + }, + "AIWorkloadConfigIdentifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the AI workload configuration used for this benchmark job.

" + }, + "RoleArn":{ + "shape":"RoleArn", + "documentation":"

The Amazon Resource Name (ARN) of the IAM role used by the benchmark job.

" + }, + "NetworkConfig":{ + "shape":"AIBenchmarkNetworkConfig", + "documentation":"

The network configuration for the benchmark job.

" + }, + "CreationTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the benchmark job was created.

" + }, + "StartTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the benchmark job started running.

" + }, + "EndTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the benchmark job completed.

" + }, + "Tags":{ + "shape":"TagList", + "documentation":"

The tags associated with the benchmark job.

" + } + } + }, + "DescribeAIRecommendationJobRequest":{ + "type":"structure", + "required":["AIRecommendationJobName"], + "members":{ + "AIRecommendationJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI recommendation job to describe.

" + } + } + }, + "DescribeAIRecommendationJobResponse":{ + "type":"structure", + "required":[ + "AIRecommendationJobName", + "AIRecommendationJobArn", + "AIRecommendationJobStatus", + "ModelSource", + "OutputConfig", + "AIWorkloadConfigIdentifier", + "RoleArn", + "CreationTime" + ], + "members":{ + "AIRecommendationJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI recommendation job.

" + }, + "AIRecommendationJobArn":{ + "shape":"AIRecommendationJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the AI recommendation job.

" + }, + "AIRecommendationJobStatus":{ + "shape":"AIRecommendationJobStatus", + "documentation":"

The status of the AI recommendation job.

" + }, + "FailureReason":{ + "shape":"FailureReason", + "documentation":"

If the recommendation job failed, the reason it failed.

" + }, + "ModelSource":{ + "shape":"AIModelSource", + "documentation":"

The source of the model that was analyzed.

" + }, + "OutputConfig":{ + "shape":"AIRecommendationOutputResult", + "documentation":"

The output configuration for the recommendation job.

" + }, + "InferenceSpecification":{ + "shape":"AIRecommendationInferenceSpecification", + "documentation":"

The inference framework configuration.

" + }, + "AIWorkloadConfigIdentifier":{ + "shape":"AIResourceIdentifier", + "documentation":"

The name or Amazon Resource Name (ARN) of the AI workload configuration used for this recommendation job.

" + }, + "OptimizeModel":{ + "shape":"AIRecommendationAllowOptimization", + "documentation":"

Whether model optimization techniques were allowed.

" + }, + "PerformanceTarget":{ + "shape":"AIRecommendationPerformanceTarget", + "documentation":"

The performance targets specified for the recommendation job.

" + }, + "Recommendations":{ + "shape":"AIRecommendationList", + "documentation":"

The list of optimization recommendations generated by the job. Each recommendation includes optimization details, deployment configuration, expected performance metrics, and the associated benchmark job ARN.

" + }, + "RoleArn":{ + "shape":"RoleArn", + "documentation":"

The Amazon Resource Name (ARN) of the IAM role used by the recommendation job.

" + }, + "ComputeSpec":{ + "shape":"AIRecommendationComputeSpec", + "documentation":"

The compute resource specification for the recommendation job.

" + }, + "CreationTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the recommendation job was created.

" + }, + "StartTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the recommendation job started running.

" + }, + "EndTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the recommendation job completed.

" + }, + "Tags":{ + "shape":"TagList", + "documentation":"

The tags associated with the recommendation job.

" + } + } + }, + "DescribeAIWorkloadConfigRequest":{ + "type":"structure", + "required":["AIWorkloadConfigName"], + "members":{ + "AIWorkloadConfigName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI workload configuration to describe.

" + } + } + }, + "DescribeAIWorkloadConfigResponse":{ + "type":"structure", + "required":[ + "AIWorkloadConfigName", + "AIWorkloadConfigArn", + "CreationTime" + ], + "members":{ + "AIWorkloadConfigName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI workload configuration.

" + }, + "AIWorkloadConfigArn":{ + "shape":"AIWorkloadConfigArn", + "documentation":"

The Amazon Resource Name (ARN) of the AI workload configuration.

" + }, + "DatasetConfig":{ + "shape":"AIDatasetConfig", + "documentation":"

The dataset configuration for the workload.

" + }, + "AIWorkloadConfigs":{ + "shape":"AIWorkloadConfigs", + "documentation":"

The benchmark tool configuration and workload specification.

" + }, + "Tags":{ + "shape":"TagList", + "documentation":"

The tags associated with the AI workload configuration.

" + }, + "CreationTime":{ + "shape":"Timestamp", + "documentation":"

A timestamp that indicates when the AI workload configuration was created.

" + } + } + }, "DescribeActionRequest":{ "type":"structure", "required":["ActionName"], @@ -18669,6 +20026,10 @@ "shape":"InferenceComponentSpecificationSummary", "documentation":"

Details about the resources that are deployed with this inference component.

" }, + "Specifications":{ + "shape":"InferenceComponentSpecificationSummaryList", + "documentation":"

A list of specification summaries for the inference component, one per instance type. This parameter is populated when the inference component was created with multiple specifications. When this parameter is populated, the singular Specification parameter is not returned.

" + }, "RuntimeConfig":{ "shape":"InferenceComponentRuntimeConfigSummary", "documentation":"

Details about the runtime settings for the model that is deployed with the inference component.

" @@ -19507,6 +20868,10 @@ "ModelPackageGroupStatus":{ "shape":"ModelPackageGroupStatus", "documentation":"

The status of the model group.

" + }, + "ManagedConfiguration":{ + "shape":"ManagedConfiguration", + "documentation":"

The managed configuration of the model package group.

" } } }, @@ -19645,6 +21010,10 @@ "ModelLifeCycle":{ "shape":"ModelLifeCycle", "documentation":"

A structure describing the current state of the model in its life cycle.

" + }, + "ManagedStorageType":{ + "shape":"ManagedStorageType", + "documentation":"

The storage type of the model package.

" } } }, @@ -20978,7 +22347,7 @@ }, "TargetResources":{ "shape":"SageMakerResourceNames", - "documentation":"

The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints) that can use this training plan.

Training plans are specific to their target resource.

" + "documentation":"

The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints, Studio apps) that can use this training plan.

Training plans are specific to their target resource.

" }, "ReservedCapacitySummaries":{ "shape":"ReservedCapacitySummaries", @@ -23200,6 +24569,13 @@ "DISABLED" ] }, + "ExecutionRoleSessionNameMode":{ + "type":"string", + "enum":[ + "STATIC", + "USER_IDENTITY" + ] + }, "ExecutionStatus":{ "type":"string", "enum":[ @@ -23218,6 +24594,10 @@ "min":0, "pattern":"[\\S\\s]*" }, + "ExpectedPerformanceList":{ + "type":"list", + "member":{"shape":"AIRecommendationPerformanceMetric"} + }, "Experiment":{ "type":"structure", "members":{ @@ -26289,6 +27669,29 @@ "min":0, "pattern":"[a-zA-Z0-9-]+" }, + "InferenceComponentPlacementStatus":{ + "type":"structure", + "required":[ + "InstanceType", + "CurrentCopyCount" + ], + "members":{ + "InstanceType":{ + "shape":"ProductionVariantInstanceType", + "documentation":"

The ML compute instance type where the inference component copies are placed.

" + }, + "CurrentCopyCount":{ + "shape":"InferenceComponentCopyCount", + "documentation":"

The number of inference component copies currently placed on instances of this type.

" + } + }, + "documentation":"

The placement status of an inference component on a specific instance type. Shows the number of inference component copies currently placed on instances of a given type.

" + }, + "InferenceComponentPlacementStatusList":{ + "type":"list", + "member":{"shape":"InferenceComponentPlacementStatus"}, + "min":1 + }, "InferenceComponentPlacementStrategy":{ "type":"string", "enum":[ @@ -26343,6 +27746,10 @@ "CurrentCopyCount":{ "shape":"InferenceComponentCopyCount", "documentation":"

The number of runtime copies of the model container that are currently deployed.

" + }, + "PlacementStatus":{ + "shape":"InferenceComponentPlacementStatusList", + "documentation":"

The placement status of the inference component across instance types. Shows how the inference component copies are distributed across instance types.

" } }, "documentation":"

Details about the runtime settings for the model that is deployed with the inference component.

" @@ -26373,6 +27780,10 @@ "InferenceComponentSpecification":{ "type":"structure", "members":{ + "InstanceType":{ + "shape":"ProductionVariantInstanceType", + "documentation":"

The ML compute instance type for the inference component specification. Specifies which instance type this specification applies to. Required when using the Specifications parameter with multiple entries.

" + }, "ModelName":{ "shape":"ModelName", "documentation":"

The name of an existing SageMaker AI model object in your account that you want to deploy with the inference component.

" @@ -26404,9 +27815,19 @@ }, "documentation":"

Details about the resources to deploy with this inference component, including the model, container, and compute resources.

" }, + "InferenceComponentSpecificationList":{ + "type":"list", + "member":{"shape":"InferenceComponentSpecification"}, + "max":5, + "min":1 + }, "InferenceComponentSpecificationSummary":{ "type":"structure", "members":{ + "InstanceType":{ + "shape":"ProductionVariantInstanceType", + "documentation":"

The ML compute instance type associated with this inference component specification.

" + }, "ModelName":{ "shape":"ModelName", "documentation":"

The name of the SageMaker AI model object that is deployed with the inference component.

" @@ -26438,6 +27859,11 @@ }, "documentation":"

Details about the resources that are deployed with this inference component.

" }, + "InferenceComponentSpecificationSummaryList":{ + "type":"list", + "member":{"shape":"InferenceComponentSpecificationSummary"}, + "min":1 + }, "InferenceComponentStartupParameters":{ "type":"structure", "members":{ @@ -27103,6 +28529,10 @@ "shape":"AdditionalEnis", "documentation":"

Information about additional Elastic Network Interfaces (ENIs) associated with the instance.

" }, + "InstanceRequirementsEniConfigurations":{ + "shape":"InstanceRequirementsEniConfigurations", + "documentation":"

The ENI configurations for the instance types in the instance requirements, grouped by network interface category (for example, ENI-only or EFA with ENIs). At most one configuration per category.

" + }, "CapacityReservation":{ "shape":"CapacityReservation", "documentation":"

Information about the Capacity Reservation used by the instance.

" @@ -27148,6 +28578,81 @@ }, "documentation":"

Configuration for how instances are placed and allocated within UltraServers. This is only applicable for UltraServer capacity.

" }, + "InstancePool":{ + "type":"structure", + "required":[ + "InstanceType", + "Priority" + ], + "members":{ + "InstanceType":{ + "shape":"ProductionVariantInstanceType", + "documentation":"

The ML compute instance type for the instance pool.

" + }, + "ModelNameOverride":{ + "shape":"ModelName", + "documentation":"

The name of a SageMaker model to use for this instance pool instead of the model specified for the production variant. Use this to deploy a different model optimized for the instance type in this pool.

" + }, + "Priority":{ + "shape":"InstancePoolPriority", + "documentation":"

The priority for the instance pool. SageMaker attempts to provision instances in order of priority, starting with the lowest value. If instances for a higher-priority pool are unavailable, SageMaker attempts to provision from the next pool.

Valid values: 1 to 5, where 1 is the highest priority.

" + } + }, + "documentation":"

Specifies an instance type and its priority for a heterogeneous endpoint. Use instance pools to configure a production variant with multiple instance types, enabling the endpoint to provision instances across different types based on priority.

" + }, + "InstancePoolList":{ + "type":"list", + "member":{"shape":"InstancePool"}, + "max":5, + "min":1 + }, + "InstancePoolPriority":{ + "type":"integer", + "box":true, + "max":5, + "min":1 + }, + "InstancePoolSummary":{ + "type":"structure", + "required":[ + "InstanceType", + "CurrentInstanceCount" + ], + "members":{ + "InstanceType":{ + "shape":"ProductionVariantInstanceType", + "documentation":"

The ML compute instance type for the instance pool.

" + }, + "CurrentInstanceCount":{ + "shape":"TaskCount", + "documentation":"

The current number of instances of this type in the instance pool.

" + } + }, + "documentation":"

A summary of an instance pool for a production variant, including the instance type and the current number of instances.

" + }, + "InstancePoolSummaryList":{ + "type":"list", + "member":{"shape":"InstancePoolSummary"}, + "min":1 + }, + "InstanceRequirementsEniConfiguration":{ + "type":"structure", + "members":{ + "CustomerEni":{ + "shape":"String", + "documentation":"

The ID of the customer-managed Elastic Network Interface (ENI) associated with the instance type category.

" + }, + "AdditionalEnis":{ + "shape":"AdditionalEnis", + "documentation":"

Information about additional Elastic Network Interfaces (ENIs) associated with the instance type category.

" + } + }, + "documentation":"

The customer ENI and additional ENIs associated with a network interface category.

" + }, + "InstanceRequirementsEniConfigurations":{ + "type":"list", + "member":{"shape":"InstanceRequirementsEniConfiguration"} + }, "InstanceType":{ "type":"string", "enum":[ @@ -28082,6 +29587,178 @@ "Action" ] }, + "ListAIBenchmarkJobsRequest":{ + "type":"structure", + "members":{ + "MaxResults":{ + "shape":"MaxResults", + "documentation":"

The maximum number of benchmark jobs to return in the response.

" + }, + "NextToken":{ + "shape":"NextToken", + "documentation":"

If the previous call to ListAIBenchmarkJobs didn't return the full set of jobs, the call returns a token for getting the next set.

" + }, + "NameContains":{ + "shape":"NameContains", + "documentation":"

A string in the job name. This filter returns only jobs whose name contains the specified string.

" + }, + "StatusEquals":{ + "shape":"AIBenchmarkJobStatus", + "documentation":"

A filter that returns only benchmark jobs with the specified status.

" + }, + "CreationTimeAfter":{ + "shape":"Timestamp", + "documentation":"

A filter that returns only jobs created after the specified time.

" + }, + "CreationTimeBefore":{ + "shape":"Timestamp", + "documentation":"

A filter that returns only jobs created before the specified time.

" + }, + "SortBy":{ + "shape":"ListAIBenchmarkJobsSortBy", + "documentation":"

The field to sort results by. The default is CreationTime.

" + }, + "SortOrder":{ + "shape":"SortOrder", + "documentation":"

The sort order for results. The default is Descending.

" + } + } + }, + "ListAIBenchmarkJobsResponse":{ + "type":"structure", + "required":["AIBenchmarkJobs"], + "members":{ + "AIBenchmarkJobs":{ + "shape":"AIBenchmarkJobSummaryList", + "documentation":"

An array of AIBenchmarkJobSummary objects, one for each benchmark job that matches the specified filters.

" + }, + "NextToken":{ + "shape":"NextToken", + "documentation":"

If the response is truncated, Amazon SageMaker AI returns this token. To retrieve the next set of jobs, use it in the subsequent request.

" + } + } + }, + "ListAIBenchmarkJobsSortBy":{ + "type":"string", + "enum":[ + "Name", + "CreationTime", + "Status" + ] + }, + "ListAIRecommendationJobsRequest":{ + "type":"structure", + "members":{ + "MaxResults":{ + "shape":"MaxResults", + "documentation":"

The maximum number of recommendation jobs to return in the response.

" + }, + "NextToken":{ + "shape":"NextToken", + "documentation":"

If the previous call to ListAIRecommendationJobs didn't return the full set of jobs, the call returns a token for getting the next set.

" + }, + "NameContains":{ + "shape":"NameContains", + "documentation":"

A string in the job name. This filter returns only jobs whose name contains the specified string.

" + }, + "StatusEquals":{ + "shape":"AIRecommendationJobStatus", + "documentation":"

A filter that returns only recommendation jobs with the specified status.

" + }, + "CreationTimeAfter":{ + "shape":"Timestamp", + "documentation":"

A filter that returns only jobs created after the specified time.

" + }, + "CreationTimeBefore":{ + "shape":"Timestamp", + "documentation":"

A filter that returns only jobs created before the specified time.

" + }, + "SortBy":{ + "shape":"ListAIRecommendationJobsSortBy", + "documentation":"

The field to sort results by. The default is CreationTime.

" + }, + "SortOrder":{ + "shape":"SortOrder", + "documentation":"

The sort order for results. The default is Descending.

" + } + } + }, + "ListAIRecommendationJobsResponse":{ + "type":"structure", + "required":["AIRecommendationJobs"], + "members":{ + "AIRecommendationJobs":{ + "shape":"AIRecommendationJobSummaryList", + "documentation":"

An array of AIRecommendationJobSummary objects, one for each recommendation job that matches the specified filters.

" + }, + "NextToken":{ + "shape":"NextToken", + "documentation":"

If the response is truncated, Amazon SageMaker AI returns this token. To retrieve the next set of jobs, use it in the subsequent request.

" + } + } + }, + "ListAIRecommendationJobsSortBy":{ + "type":"string", + "enum":[ + "Name", + "CreationTime", + "Status" + ] + }, + "ListAIWorkloadConfigsRequest":{ + "type":"structure", + "members":{ + "MaxResults":{ + "shape":"MaxResults", + "documentation":"

The maximum number of AI workload configurations to return in the response.

" + }, + "NextToken":{ + "shape":"NextToken", + "documentation":"

If the previous call to ListAIWorkloadConfigs didn't return the full set of configurations, the call returns a token for getting the next set of configurations.

" + }, + "NameContains":{ + "shape":"NameContains", + "documentation":"

A string in the configuration name. This filter returns only configurations whose name contains the specified string.

" + }, + "CreationTimeAfter":{ + "shape":"Timestamp", + "documentation":"

A filter that returns only configurations created after the specified time.

" + }, + "CreationTimeBefore":{ + "shape":"Timestamp", + "documentation":"

A filter that returns only configurations created before the specified time.

" + }, + "SortBy":{ + "shape":"ListAIWorkloadConfigsSortBy", + "documentation":"

The field to sort results by. The default is CreationTime.

" + }, + "SortOrder":{ + "shape":"SortOrder", + "documentation":"

The sort order for results. The default is Descending.

" + } + } + }, + "ListAIWorkloadConfigsResponse":{ + "type":"structure", + "required":["AIWorkloadConfigs"], + "members":{ + "AIWorkloadConfigs":{ + "shape":"AIWorkloadConfigSummaryList", + "documentation":"

An array of AIWorkloadConfigSummary objects, one for each AI workload configuration that matches the specified filters.

" + }, + "NextToken":{ + "shape":"NextToken", + "documentation":"

If the response is truncated, Amazon SageMaker AI returns this token. To retrieve the next set of configurations, use it in the subsequent request.

" + } + } + }, + "ListAIWorkloadConfigsSortBy":{ + "type":"string", + "enum":[ + "Name", + "CreationTime" + ] + }, "ListActionsRequest":{ "type":"structure", "members":{ @@ -32562,6 +34239,16 @@ "min":0, "pattern":"\\d+\\.\\d+" }, + "ManagedConfiguration":{ + "type":"structure", + "members":{ + "ManagedStorageType":{ + "shape":"ManagedStorageType", + "documentation":"

The storage type of the model package.

" + } + }, + "documentation":"

The managed configuration of a model package group.

" + }, "ManagedInstanceScalingCooldownInMinutes":{ "type":"integer", "box":true, @@ -32598,6 +34285,10 @@ "DISABLED" ] }, + "ManagedStorageType":{ + "type":"string", + "enum":["Restricted"] + }, "MapString2048":{ "type":"map", "key":{"shape":"String2048"}, @@ -32897,11 +34588,11 @@ "members":{ "EnableEnhancedMetrics":{ "shape":"EnableEnhancedMetrics", - "documentation":"

Specifies whether to enable enhanced metrics for the endpoint. Enhanced metrics provide utilization data at instance and container granularity. Container granularity is supported for Inference Components. The default is False.

" + "documentation":"

Specifies whether to enable enhanced metrics for the endpoint. Enhanced metrics provide utilization and invocation data at instance and container granularity. Container granularity is supported for Inference Components. The default is False.

" }, "MetricPublishFrequencyInSeconds":{ "shape":"MetricPublishFrequencyInSeconds", - "documentation":"

The frequency, in seconds, at which utilization metrics are published to Amazon CloudWatch. The default is 60 seconds.

" + "documentation":"

The interval, in seconds, at which metrics are published to Amazon CloudWatch. Defaults to 60. Valid values: 10, 30, 60, 120, 180, 240, 300. When EnableEnhancedMetrics is set to False, this interval applies to utilization metrics only; invocation metrics continue to be published at the default 60-second interval. When EnableEnhancedMetrics is set to True, this interval applies to both utilization and invocation metrics.

" } }, "documentation":"

The configuration for Utilization metrics.

" @@ -34271,6 +35962,10 @@ "shape":"String", "documentation":"

The name of a pre-trained machine learning benchmarked by Amazon SageMaker Inference Recommender model that matches your model. You can find a list of benchmarked models by calling ListModelMetadata.

" }, + "AdditionalModelDataSources":{ + "shape":"AdditionalModelDataSources", + "documentation":"

Data sources that are available to your model in addition to the one that you specify for ModelDataSource when you use the CreateModelPackage action.

" + }, "AdditionalS3DataSource":{ "shape":"AdditionalS3DataSource", "documentation":"

The additional data source that is used during inference in the Docker container for your model package.

" @@ -34386,6 +36081,10 @@ "ModelPackageGroupStatus":{ "shape":"ModelPackageGroupStatus", "documentation":"

The status of the model group.

" + }, + "ManagedConfiguration":{ + "shape":"ManagedConfiguration", + "documentation":"

The managed configuration of the model package group.

" } }, "documentation":"

Summary information about a model group.

" @@ -36107,12 +37806,12 @@ }, "DisableGlueTableCreation":{ "shape":"Boolean", - "documentation":"

Set to True to disable the automatic creation of an Amazon Web Services Glue table when configuring an OfflineStore. If set to False, Feature Store will name the OfflineStore Glue table following Athena's naming recommendations.

The default value is False.

", + "documentation":"

Set to True to disable the automatic creation of an Amazon Web Services Glue table when configuring an OfflineStore. If set to True and DataCatalogConfig is provided, Feature Store associates the provided catalog configuration with the feature group without creating a table. In this case, you are responsible for creating and managing the Glue table. If set to True without DataCatalogConfig, no Glue table is created or associated with the feature group. The Iceberg table format is only supported when this is set to False.

If set to False and DataCatalogConfig is provided, Feature Store creates the table using the specified names. If set to False without DataCatalogConfig, Feature Store auto-generates the table name following Athena's naming recommendations. This applies to both Glue and Apache Iceberg table formats.

The default value is False.

", "box":true }, "DataCatalogConfig":{ "shape":"DataCatalogConfig", - "documentation":"

The meta data of the Glue table that is autogenerated when an OfflineStore is created.

" + "documentation":"

The meta data of the Glue table for the OfflineStore. If not provided, Feature Store auto-generates the table name, database, and catalog when the OfflineStore is created. You can optionally provide this configuration to specify custom values. This applies to both Glue and Apache Iceberg table formats.

" }, "TableFormat":{ "shape":"TableFormat", @@ -36375,6 +38074,12 @@ "ml.p5.48xlarge", "ml.p5e.48xlarge", "ml.p5en.48xlarge", + "ml.g4dn.xlarge", + "ml.g4dn.2xlarge", + "ml.g4dn.4xlarge", + "ml.g4dn.8xlarge", + "ml.g4dn.12xlarge", + "ml.g4dn.16xlarge", "ml.g5.xlarge", "ml.g5.2xlarge", "ml.g5.4xlarge", @@ -37053,6 +38758,10 @@ "shape":"ProductionVariantInstanceType", "documentation":"

The type of instances associated with the variant.

" }, + "InstancePools":{ + "shape":"InstancePoolSummaryList", + "documentation":"

A list of instance pools for the production variant. Each pool indicates the instance type and the current number of instances of that type.

" + }, "AcceleratorType":{ "shape":"ProductionVariantAcceleratorType", "documentation":"

This parameter is no longer supported. Elastic Inference (EI) is no longer available.

This parameter was used to specify the size of the EI instance to use for the production variant.

" @@ -38369,6 +40078,14 @@ "shape":"ProductionVariantInstanceType", "documentation":"

The ML compute instance type.

" }, + "InstancePools":{ + "shape":"InstancePoolList", + "documentation":"

A list of instance pools for the production variant. Each instance pool specifies an instance type and its priority for provisioning. Use instance pools to configure heterogeneous endpoints that deploy models across multiple instance types.

" + }, + "VariantInstanceProvisionTimeoutInSeconds":{ + "shape":"VariantInstanceProvisionTimeoutInSeconds", + "documentation":"

The timeout value, in seconds, for provisioning instances for the production variant. When SageMaker encounters an insufficient capacity error while provisioning instances, it retries with the next instance pool (if configured) or waits until the timeout expires. This timeout applies only to capacity provisioning and does not include the time for model download or container startup.

Valid values: 300 to 3600.

" + }, "InitialVariantWeight":{ "shape":"VariantWeight", "documentation":"

Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. The traffic to a production variant is determined by the ratio of the VariantWeight to the sum of all VariantWeight values across all ProductionVariants. If unspecified, it defaults to 1.0.

" @@ -38939,6 +40656,10 @@ "shape":"TaskCount", "documentation":"

The number of instances requested in the UpdateEndpointWeightsAndCapacities request.

" }, + "InstancePools":{ + "shape":"InstancePoolSummaryList", + "documentation":"

A list of instance pools for the production variant. Each pool indicates the instance type and the current number of instances of that type.

" + }, "VariantStatus":{ "shape":"ProductionVariantStatusList", "documentation":"

The endpoint variant status which describes the current deployment stage status or operational status.

" @@ -39627,7 +41348,7 @@ ], "members":{ "InstanceType":{ - "shape":"InstanceType", + "shape":"ProductionVariantInstanceType", "documentation":"

The instance type the model is deployed to.

" }, "InstanceCount":{ @@ -40750,6 +42471,10 @@ "LifecycleConfigArn":{ "shape":"StudioLifecycleConfigArn", "documentation":"

The Amazon Resource Name (ARN) of the Lifecycle Configuration attached to the Resource.

" + }, + "TrainingPlanArn":{ + "shape":"StudioResourceSpecTrainingPlanArn", + "documentation":"

The ARN of the SageMaker AI Training Plan to use for this app. When you specify a training plan, the app launches on reserved GPU capacity. This field is supported for JupyterLab and CodeEditor app types.

For more information about how to reserve GPU capacity with SageMaker AI Training Plans, see Using training plans in Studio applications.

" } }, "documentation":"

Specifies the ARN's of a SageMaker AI image and SageMaker AI image version, and the instance type that the version runs on.

When both SageMakerImageVersionArn and SageMakerImageArn are passed, SageMakerImageVersionArn is used. Any updates to SageMakerImageArn will not take effect if SageMakerImageVersionArn already exists in the ResourceSpec because SageMakerImageVersionArn always takes precedence. To clear the value set for SageMakerImageVersionArn, pass None as the value.

" @@ -41161,7 +42886,8 @@ "enum":[ "training-job", "hyperpod-cluster", - "endpoint" + "endpoint", + "studio-apps" ] }, "SageMakerResourceNames":{ @@ -41509,7 +43235,7 @@ }, "TargetResources":{ "shape":"SageMakerResourceNames", - "documentation":"

The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints) to search for in the offerings.

Training plans are specific to their target resource.

" + "documentation":"

The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints, Studio apps) to search for in the offerings.

Training plans are specific to their target resource.

" }, "TrainingPlanArn":{ "shape":"String", @@ -42655,6 +44381,46 @@ "Succeeded" ] }, + "StopAIBenchmarkJobRequest":{ + "type":"structure", + "required":["AIBenchmarkJobName"], + "members":{ + "AIBenchmarkJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI benchmark job to stop.

" + } + } + }, + "StopAIBenchmarkJobResponse":{ + "type":"structure", + "required":["AIBenchmarkJobArn"], + "members":{ + "AIBenchmarkJobArn":{ + "shape":"AIBenchmarkJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the stopped benchmark job.

" + } + } + }, + "StopAIRecommendationJobRequest":{ + "type":"structure", + "required":["AIRecommendationJobName"], + "members":{ + "AIRecommendationJobName":{ + "shape":"AIEntityName", + "documentation":"

The name of the AI recommendation job to stop.

" + } + } + }, + "StopAIRecommendationJobResponse":{ + "type":"structure", + "required":["AIRecommendationJobArn"], + "members":{ + "AIRecommendationJobArn":{ + "shape":"AIRecommendationJobArn", + "documentation":"

The Amazon Resource Name (ARN) of the stopped recommendation job.

" + } + } + }, "StopAutoMLJobRequest":{ "type":"structure", "required":["AutoMLJobName"], @@ -43022,6 +44788,13 @@ "type":"list", "member":{"shape":"StudioLifecycleConfigDetails"} }, + "StudioResourceSpecTrainingPlanArn":{ + "type":"string", + "documentation":"

TrainingPlanArn variant for ResourceSpec that allows "None" to detach a training plan. Based on TrainingPlanArn (min:50, max:2048) but with min:0 and "None" in pattern.

", + "max":2048, + "min":0, + "pattern":"(arn:aws[a-z\\-]*:sagemaker:[a-z0-9\\-]*:[0-9]{12}:training-plan/.*|None)" + }, "StudioWebPortal":{ "type":"string", "enum":[ @@ -43047,6 +44820,10 @@ "HiddenSageMakerImageVersionAliases":{ "shape":"HiddenSageMakerImageVersionAliasesList", "documentation":"

The version aliases you are hiding from the Studio user interface.

" + }, + "ExecutionRoleSessionNameMode":{ + "shape":"ExecutionRoleSessionNameMode", + "documentation":"

The execution role session name mode. If this value is set to USER_IDENTITY, the session name of the execution role corresponds to the user's identity. For IAM domains, the session name is the IAM session name used to generate the presigned URL. For IAM Identity Center domains, the session name is the username of the associated IAM Identity Center user. If this value is set to STATIC or is not set, the session name defaults to SageMaker.

" } }, "documentation":"

Studio settings. If these settings are applied on a user level, they take priority over the settings applied on a domain level.

" @@ -44180,6 +45957,10 @@ "shape":"ResourceConfig", "documentation":"

Resources, including ML compute instances and ML storage volumes, that are configured for model training.

" }, + "WarmPoolStatus":{ + "shape":"WarmPoolStatus", + "documentation":"

The status of the warm pool associated with the training job.

" + }, "VpcConfig":{ "shape":"VpcConfig", "documentation":"

A VpcConfig object that specifies the VPC that this training job has access to. For more information, see Protect Training Jobs by Using an Amazon Virtual Private Cloud.

" @@ -44619,7 +46400,7 @@ }, "TargetResources":{ "shape":"SageMakerResourceNames", - "documentation":"

The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints) for this training plan offering.

Training plans are specific to their target resource.

" + "documentation":"

The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints, Studio apps) for this training plan offering.

Training plans are specific to their target resource.

" }, "RequestedStartTimeAfter":{ "shape":"Timestamp", @@ -44763,7 +46544,7 @@ }, "TargetResources":{ "shape":"SageMakerResourceNames", - "documentation":"

The target resources (e.g., training jobs, HyperPod clusters, Endpoints) that can use this training plan.

Training plans are specific to their target resource.

" + "documentation":"

The target resources (e.g., training jobs, HyperPod clusters, Endpoints, Studio apps) that can use this training plan.

Training plans are specific to their target resource.

" }, "ReservedCapacitySummaries":{ "shape":"ReservedCapacitySummaries", @@ -46942,6 +48723,10 @@ "shape":"InferenceComponentSpecification", "documentation":"

Details about the resources to deploy with this inference component, including the model, container, and compute resources.

" }, + "Specifications":{ + "shape":"InferenceComponentSpecificationList", + "documentation":"

A list of specification objects for the inference component, one per instance type. Use this parameter when you want to specify different model or resource configurations for the inference component on each instance type. You can use either this parameter or the singular Specification parameter, but not both.

" + }, "RuntimeConfig":{ "shape":"InferenceComponentRuntimeConfig", "documentation":"

Runtime settings for a model that is deployed with an inference component.

" @@ -48049,6 +49834,12 @@ "max":1, "min":0 }, + "VariantInstanceProvisionTimeoutInSeconds":{ + "type":"integer", + "box":true, + "max":3600, + "min":300 + }, "VariantName":{ "type":"string", "max":63, @@ -48494,6 +50285,17 @@ "type":"list", "member":{"shape":"Workforce"} }, + "WorkloadSpec":{ + "type":"structure", + "members":{ + "Inline":{ + "shape":"String", + "documentation":"

An inline YAML or JSON string that defines benchmark parameters.

" + } + }, + "documentation":"

The workload specification for benchmark tool configuration. Provide an inline YAML or JSON string.

", + "union":true + }, "WorkspaceSettings":{ "type":"structure", "members":{ diff --git a/src/sagemaker_core/main/code_injection/shape_dag.py b/src/sagemaker_core/main/code_injection/shape_dag.py index 2dcb233..a9c8b18 100644 --- a/src/sagemaker_core/main/code_injection/shape_dag.py +++ b/src/sagemaker_core/main/code_injection/shape_dag.py @@ -1,4 +1,339 @@ SHAPE_DAG = { + "AIBenchmarkEndpoint": { + "members": [ + {"name": "Identifier", "shape": "AIResourceIdentifier", "type": "string"}, + {"name": "TargetContainerHostname", "shape": "String", "type": "string"}, + { + "name": "InferenceComponents", + "shape": "AIBenchmarkInferenceComponentList", + "type": "list", + }, + ], + "type": "structure", + }, + "AIBenchmarkInferenceComponent": { + "members": [{"name": "Identifier", "shape": "AIResourceIdentifier", "type": "string"}], + "type": "structure", + }, + "AIBenchmarkInferenceComponentList": { + "member_shape": "AIBenchmarkInferenceComponent", + "member_type": "structure", + "type": "list", + }, + "AIBenchmarkJobSummary": { + "members": [ + {"name": "AIBenchmarkJobName", "shape": "AIEntityName", "type": "string"}, + {"name": "AIBenchmarkJobArn", "shape": "AIBenchmarkJobArn", "type": "string"}, + {"name": "AIBenchmarkJobStatus", "shape": "AIBenchmarkJobStatus", "type": "string"}, + {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "EndTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "AIWorkloadConfigName", "shape": "AIEntityName", "type": "string"}, + ], + "type": "structure", + }, + "AIBenchmarkJobSummaryList": { + "member_shape": "AIBenchmarkJobSummary", + "member_type": "structure", + "type": "list", + }, + "AIBenchmarkNetworkConfig": { + "members": [{"name": "VpcConfig", "shape": "VpcConfig", "type": "structure"}], + "type": "structure", + }, + "AIBenchmarkOutputConfig": { + "members": [{"name": "S3OutputLocation", "shape": "S3Uri", "type": "string"}], + "type": "structure", + }, + "AIBenchmarkOutputResult": { + "members": [ + {"name": "S3OutputLocation", "shape": "S3Uri", "type": "string"}, + {"name": "CloudWatchLogs", "shape": "AICloudWatchLogsList", "type": "list"}, + ], + "type": "structure", + }, + "AIBenchmarkTarget": { + "members": [{"name": "Endpoint", "shape": "AIBenchmarkEndpoint", "type": "structure"}], + "type": "structure", + }, + "AICapacityReservationConfig": { + "members": [ + { + "name": "CapacityReservationPreference", + "shape": "AICapacityReservationPreference", + "type": "string", + }, + {"name": "MlReservationArns", "shape": "AIMlReservationArnList", "type": "list"}, + ], + "type": "structure", + }, + "AICloudWatchLogs": { + "members": [ + {"name": "LogGroupArn", "shape": "String", "type": "string"}, + {"name": "LogStreamName", "shape": "String", "type": "string"}, + ], + "type": "structure", + }, + "AICloudWatchLogsList": { + "member_shape": "AICloudWatchLogs", + "member_type": "structure", + "type": "list", + }, + "AIDatasetConfig": { + "members": [ + {"name": "InputDataConfig", "shape": "AIWorkloadInputDataConfigList", "type": "list"} + ], + "type": "structure", + }, + "AIMlReservationArnList": { + "member_shape": "AIMlReservationArn", + "member_type": "string", + "type": "list", + }, + "AIModelSource": { + "members": [{"name": "S3", "shape": "AIModelSourceS3", "type": "structure"}], + "type": "structure", + }, + "AIModelSourceS3": { + "members": [{"name": "S3Uri", "shape": "S3Uri", "type": "string"}], + "type": "structure", + }, + "AIRecommendation": { + "members": [ + {"name": "RecommendationDescription", "shape": "String", "type": "string"}, + { + "name": "OptimizationDetails", + "shape": "AIRecommendationOptimizationDetailList", + "type": "list", + }, + {"name": "ModelDetails", "shape": "AIRecommendationModelDetails", "type": "structure"}, + { + "name": "DeploymentConfiguration", + "shape": "AIRecommendationDeploymentConfiguration", + "type": "structure", + }, + {"name": "AIBenchmarkJobArn", "shape": "AIBenchmarkJobArn", "type": "string"}, + {"name": "ExpectedPerformance", "shape": "ExpectedPerformanceList", "type": "list"}, + ], + "type": "structure", + }, + "AIRecommendationComputeSpec": { + "members": [ + {"name": "InstanceTypes", "shape": "AIRecommendationInstanceTypeList", "type": "list"}, + { + "name": "CapacityReservationConfig", + "shape": "AICapacityReservationConfig", + "type": "structure", + }, + ], + "type": "structure", + }, + "AIRecommendationConstraint": { + "members": [{"name": "Metric", "shape": "AIRecommendationMetric", "type": "string"}], + "type": "structure", + }, + "AIRecommendationConstraintList": { + "member_shape": "AIRecommendationConstraint", + "member_type": "structure", + "type": "list", + }, + "AIRecommendationDeploymentConfiguration": { + "members": [ + {"name": "S3", "shape": "AIRecommendationDeploymentS3ChannelList", "type": "list"}, + {"name": "ImageUri", "shape": "String", "type": "string"}, + {"name": "InstanceType", "shape": "AIRecommendationInstanceType", "type": "string"}, + {"name": "InstanceCount", "shape": "AIRecommendationInstanceCount", "type": "integer"}, + { + "name": "CopyCountPerInstance", + "shape": "AIRecommendationCopyCountPerInstance", + "type": "integer", + }, + {"name": "EnvironmentVariables", "shape": "EnvironmentMap", "type": "map"}, + ], + "type": "structure", + }, + "AIRecommendationDeploymentS3Channel": { + "members": [ + {"name": "ChannelName", "shape": "AIChannelName", "type": "string"}, + {"name": "Uri", "shape": "S3Uri", "type": "string"}, + ], + "type": "structure", + }, + "AIRecommendationDeploymentS3ChannelList": { + "member_shape": "AIRecommendationDeploymentS3Channel", + "member_type": "structure", + "type": "list", + }, + "AIRecommendationInferenceSpecification": { + "members": [ + {"name": "Framework", "shape": "AIRecommendationInferenceFramework", "type": "string"} + ], + "type": "structure", + }, + "AIRecommendationInstanceDetail": { + "members": [ + {"name": "InstanceType", "shape": "AIRecommendationInstanceType", "type": "string"}, + {"name": "InstanceCount", "shape": "AIRecommendationInstanceCount", "type": "integer"}, + { + "name": "CopyCountPerInstance", + "shape": "AIRecommendationCopyCountPerInstance", + "type": "integer", + }, + ], + "type": "structure", + }, + "AIRecommendationInstanceDetailList": { + "member_shape": "AIRecommendationInstanceDetail", + "member_type": "structure", + "type": "list", + }, + "AIRecommendationInstanceTypeList": { + "member_shape": "AIRecommendationInstanceType", + "member_type": "string", + "type": "list", + }, + "AIRecommendationJobSummary": { + "members": [ + {"name": "AIRecommendationJobName", "shape": "AIEntityName", "type": "string"}, + {"name": "AIRecommendationJobArn", "shape": "AIRecommendationJobArn", "type": "string"}, + { + "name": "AIRecommendationJobStatus", + "shape": "AIRecommendationJobStatus", + "type": "string", + }, + {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "EndTime", "shape": "Timestamp", "type": "timestamp"}, + ], + "type": "structure", + }, + "AIRecommendationJobSummaryList": { + "member_shape": "AIRecommendationJobSummary", + "member_type": "structure", + "type": "list", + }, + "AIRecommendationList": { + "member_shape": "AIRecommendation", + "member_type": "structure", + "type": "list", + }, + "AIRecommendationModelDetails": { + "members": [ + {"name": "ModelPackageArn", "shape": "ModelPackageArn", "type": "string"}, + { + "name": "InferenceSpecificationName", + "shape": "AIInferenceSpecificationName", + "type": "string", + }, + { + "name": "InstanceDetails", + "shape": "AIRecommendationInstanceDetailList", + "type": "list", + }, + ], + "type": "structure", + }, + "AIRecommendationOptimizationConfigMap": { + "key_shape": "String", + "key_type": "string", + "type": "map", + "value_shape": "String", + "value_type": "string", + }, + "AIRecommendationOptimizationDetail": { + "members": [ + { + "name": "OptimizationType", + "shape": "AIRecommendationOptimizationType", + "type": "string", + }, + { + "name": "OptimizationConfig", + "shape": "AIRecommendationOptimizationConfigMap", + "type": "map", + }, + ], + "type": "structure", + }, + "AIRecommendationOptimizationDetailList": { + "member_shape": "AIRecommendationOptimizationDetail", + "member_type": "structure", + "type": "list", + }, + "AIRecommendationOutputConfig": { + "members": [ + {"name": "S3OutputLocation", "shape": "S3Uri", "type": "string"}, + { + "name": "ModelPackageGroupIdentifier", + "shape": "AIResourceIdentifier", + "type": "string", + }, + ], + "type": "structure", + }, + "AIRecommendationOutputResult": { + "members": [ + {"name": "S3OutputLocation", "shape": "S3Uri", "type": "string"}, + { + "name": "ModelPackageGroupIdentifier", + "shape": "AIResourceIdentifier", + "type": "string", + }, + ], + "type": "structure", + }, + "AIRecommendationPerformanceMetric": { + "members": [ + {"name": "Metric", "shape": "String", "type": "string"}, + {"name": "Stat", "shape": "String", "type": "string"}, + {"name": "Value", "shape": "String", "type": "string"}, + {"name": "Unit", "shape": "String", "type": "string"}, + ], + "type": "structure", + }, + "AIRecommendationPerformanceTarget": { + "members": [ + {"name": "Constraints", "shape": "AIRecommendationConstraintList", "type": "list"} + ], + "type": "structure", + }, + "AIWorkloadConfigSummary": { + "members": [ + {"name": "AIWorkloadConfigName", "shape": "AIEntityName", "type": "string"}, + {"name": "AIWorkloadConfigArn", "shape": "AIWorkloadConfigArn", "type": "string"}, + {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, + ], + "type": "structure", + }, + "AIWorkloadConfigSummaryList": { + "member_shape": "AIWorkloadConfigSummary", + "member_type": "structure", + "type": "list", + }, + "AIWorkloadConfigs": { + "members": [{"name": "WorkloadSpec", "shape": "WorkloadSpec", "type": "structure"}], + "type": "structure", + }, + "AIWorkloadDataSource": { + "members": [ + {"name": "S3DataSource", "shape": "AIWorkloadS3DataSource", "type": "structure"} + ], + "type": "structure", + }, + "AIWorkloadInputDataConfig": { + "members": [ + {"name": "ChannelName", "shape": "AIChannelName", "type": "string"}, + {"name": "DataSource", "shape": "AIWorkloadDataSource", "type": "structure"}, + ], + "type": "structure", + }, + "AIWorkloadInputDataConfigList": { + "member_shape": "AIWorkloadInputDataConfig", + "member_type": "structure", + "type": "list", + }, + "AIWorkloadS3DataSource": { + "members": [{"name": "S3Uri", "shape": "S3Uri", "type": "string"}], + "type": "structure", + }, "AbsoluteBorrowLimitResourceList": { "member_shape": "ComputeQuotaResourceConfig", "member_type": "structure", @@ -1706,6 +2041,7 @@ {"name": "EventTime", "shape": "Timestamp", "type": "timestamp"}, {"name": "EventDetails", "shape": "EventDetails", "type": "structure"}, {"name": "Description", "shape": "String", "type": "string"}, + {"name": "EventLevel", "shape": "ClusterEventLevel", "type": "string"}, ], "type": "structure", }, @@ -1724,6 +2060,7 @@ {"name": "ResourceType", "shape": "ClusterEventResourceType", "type": "string"}, {"name": "EventTime", "shape": "Timestamp", "type": "timestamp"}, {"name": "Description", "shape": "String", "type": "string"}, + {"name": "EventLevel", "shape": "ClusterEventLevel", "type": "string"}, ], "type": "structure", }, @@ -1779,6 +2116,7 @@ }, {"name": "CurrentImageId", "shape": "ImageId", "type": "string"}, {"name": "DesiredImageId", "shape": "ImageId", "type": "string"}, + {"name": "ImageVersionStatus", "shape": "ClusterImageVersionStatus", "type": "string"}, {"name": "ActiveOperations", "shape": "ActiveOperations", "type": "map"}, { "name": "KubernetesConfig", @@ -2012,6 +2350,7 @@ {"name": "Placement", "shape": "ClusterInstancePlacement", "type": "structure"}, {"name": "CurrentImageId", "shape": "ImageId", "type": "string"}, {"name": "DesiredImageId", "shape": "ImageId", "type": "string"}, + {"name": "ImageVersionStatus", "shape": "ClusterImageVersionStatus", "type": "string"}, {"name": "UltraServerInfo", "shape": "UltraServerInfo", "type": "structure"}, { "name": "KubernetesConfig", @@ -2053,6 +2392,7 @@ }, {"name": "UltraServerInfo", "shape": "UltraServerInfo", "type": "structure"}, {"name": "PrivateDnsHostname", "shape": "ClusterPrivateDnsHostname", "type": "string"}, + {"name": "ImageVersionStatus", "shape": "ClusterImageVersionStatus", "type": "string"}, ], "type": "structure", }, @@ -2520,6 +2860,78 @@ ], "type": "structure", }, + "CreateAIBenchmarkJobRequest": { + "members": [ + {"name": "AIBenchmarkJobName", "shape": "AIEntityName", "type": "string"}, + {"name": "BenchmarkTarget", "shape": "AIBenchmarkTarget", "type": "structure"}, + {"name": "OutputConfig", "shape": "AIBenchmarkOutputConfig", "type": "structure"}, + { + "name": "AIWorkloadConfigIdentifier", + "shape": "AIResourceIdentifier", + "type": "string", + }, + {"name": "RoleArn", "shape": "RoleArn", "type": "string"}, + {"name": "NetworkConfig", "shape": "AIBenchmarkNetworkConfig", "type": "structure"}, + {"name": "Tags", "shape": "TagList", "type": "list"}, + ], + "type": "structure", + }, + "CreateAIBenchmarkJobResponse": { + "members": [{"name": "AIBenchmarkJobArn", "shape": "AIBenchmarkJobArn", "type": "string"}], + "type": "structure", + }, + "CreateAIRecommendationJobRequest": { + "members": [ + {"name": "AIRecommendationJobName", "shape": "AIEntityName", "type": "string"}, + {"name": "ModelSource", "shape": "AIModelSource", "type": "structure"}, + {"name": "OutputConfig", "shape": "AIRecommendationOutputConfig", "type": "structure"}, + { + "name": "AIWorkloadConfigIdentifier", + "shape": "AIResourceIdentifier", + "type": "string", + }, + { + "name": "PerformanceTarget", + "shape": "AIRecommendationPerformanceTarget", + "type": "structure", + }, + {"name": "RoleArn", "shape": "RoleArn", "type": "string"}, + { + "name": "InferenceSpecification", + "shape": "AIRecommendationInferenceSpecification", + "type": "structure", + }, + { + "name": "OptimizeModel", + "shape": "AIRecommendationAllowOptimization", + "type": "boolean", + }, + {"name": "ComputeSpec", "shape": "AIRecommendationComputeSpec", "type": "structure"}, + {"name": "Tags", "shape": "TagList", "type": "list"}, + ], + "type": "structure", + }, + "CreateAIRecommendationJobResponse": { + "members": [ + {"name": "AIRecommendationJobArn", "shape": "AIRecommendationJobArn", "type": "string"} + ], + "type": "structure", + }, + "CreateAIWorkloadConfigRequest": { + "members": [ + {"name": "AIWorkloadConfigName", "shape": "AIEntityName", "type": "string"}, + {"name": "DatasetConfig", "shape": "AIDatasetConfig", "type": "structure"}, + {"name": "AIWorkloadConfigs", "shape": "AIWorkloadConfigs", "type": "structure"}, + {"name": "Tags", "shape": "TagList", "type": "list"}, + ], + "type": "structure", + }, + "CreateAIWorkloadConfigResponse": { + "members": [ + {"name": "AIWorkloadConfigArn", "shape": "AIWorkloadConfigArn", "type": "string"} + ], + "type": "structure", + }, "CreateActionRequest": { "members": [ {"name": "ActionName", "shape": "ExperimentEntityName", "type": "string"}, @@ -3155,6 +3567,11 @@ "shape": "InferenceComponentSpecification", "type": "structure", }, + { + "name": "Specifications", + "shape": "InferenceComponentSpecificationList", + "type": "list", + }, { "name": "RuntimeConfig", "shape": "InferenceComponentRuntimeConfig", @@ -3425,6 +3842,7 @@ "type": "string", }, {"name": "Tags", "shape": "TagList", "type": "list"}, + {"name": "ManagedConfiguration", "shape": "ManagedConfiguration", "type": "structure"}, ], "type": "structure", }, @@ -3480,6 +3898,7 @@ {"name": "SecurityConfig", "shape": "ModelPackageSecurityConfig", "type": "structure"}, {"name": "ModelCard", "shape": "ModelPackageModelCard", "type": "structure"}, {"name": "ModelLifeCycle", "shape": "ModelLifeCycle", "type": "structure"}, + {"name": "ManagedStorageType", "shape": "ManagedStorageType", "type": "string"}, ], "type": "structure", }, @@ -4391,6 +4810,34 @@ ], "type": "structure", }, + "DeleteAIBenchmarkJobRequest": { + "members": [{"name": "AIBenchmarkJobName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "DeleteAIBenchmarkJobResponse": { + "members": [{"name": "AIBenchmarkJobArn", "shape": "AIBenchmarkJobArn", "type": "string"}], + "type": "structure", + }, + "DeleteAIRecommendationJobRequest": { + "members": [{"name": "AIRecommendationJobName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "DeleteAIRecommendationJobResponse": { + "members": [ + {"name": "AIRecommendationJobArn", "shape": "AIRecommendationJobArn", "type": "string"} + ], + "type": "structure", + }, + "DeleteAIWorkloadConfigRequest": { + "members": [{"name": "AIWorkloadConfigName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "DeleteAIWorkloadConfigResponse": { + "members": [ + {"name": "AIWorkloadConfigArn", "shape": "AIWorkloadConfigArn", "type": "string"} + ], + "type": "structure", + }, "DeleteActionRequest": { "members": [{"name": "ActionName", "shape": "ExperimentEntityName", "type": "string"}], "type": "structure", @@ -4894,6 +5341,93 @@ ], "type": "structure", }, + "DescribeAIBenchmarkJobRequest": { + "members": [{"name": "AIBenchmarkJobName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "DescribeAIBenchmarkJobResponse": { + "members": [ + {"name": "AIBenchmarkJobName", "shape": "AIEntityName", "type": "string"}, + {"name": "AIBenchmarkJobArn", "shape": "AIBenchmarkJobArn", "type": "string"}, + {"name": "AIBenchmarkJobStatus", "shape": "AIBenchmarkJobStatus", "type": "string"}, + {"name": "FailureReason", "shape": "FailureReason", "type": "string"}, + {"name": "BenchmarkTarget", "shape": "AIBenchmarkTarget", "type": "structure"}, + {"name": "OutputConfig", "shape": "AIBenchmarkOutputResult", "type": "structure"}, + { + "name": "AIWorkloadConfigIdentifier", + "shape": "AIResourceIdentifier", + "type": "string", + }, + {"name": "RoleArn", "shape": "RoleArn", "type": "string"}, + {"name": "NetworkConfig", "shape": "AIBenchmarkNetworkConfig", "type": "structure"}, + {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "StartTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "EndTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "Tags", "shape": "TagList", "type": "list"}, + ], + "type": "structure", + }, + "DescribeAIRecommendationJobRequest": { + "members": [{"name": "AIRecommendationJobName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "DescribeAIRecommendationJobResponse": { + "members": [ + {"name": "AIRecommendationJobName", "shape": "AIEntityName", "type": "string"}, + {"name": "AIRecommendationJobArn", "shape": "AIRecommendationJobArn", "type": "string"}, + { + "name": "AIRecommendationJobStatus", + "shape": "AIRecommendationJobStatus", + "type": "string", + }, + {"name": "FailureReason", "shape": "FailureReason", "type": "string"}, + {"name": "ModelSource", "shape": "AIModelSource", "type": "structure"}, + {"name": "OutputConfig", "shape": "AIRecommendationOutputResult", "type": "structure"}, + { + "name": "InferenceSpecification", + "shape": "AIRecommendationInferenceSpecification", + "type": "structure", + }, + { + "name": "AIWorkloadConfigIdentifier", + "shape": "AIResourceIdentifier", + "type": "string", + }, + { + "name": "OptimizeModel", + "shape": "AIRecommendationAllowOptimization", + "type": "boolean", + }, + { + "name": "PerformanceTarget", + "shape": "AIRecommendationPerformanceTarget", + "type": "structure", + }, + {"name": "Recommendations", "shape": "AIRecommendationList", "type": "list"}, + {"name": "RoleArn", "shape": "RoleArn", "type": "string"}, + {"name": "ComputeSpec", "shape": "AIRecommendationComputeSpec", "type": "structure"}, + {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "StartTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "EndTime", "shape": "Timestamp", "type": "timestamp"}, + {"name": "Tags", "shape": "TagList", "type": "list"}, + ], + "type": "structure", + }, + "DescribeAIWorkloadConfigRequest": { + "members": [{"name": "AIWorkloadConfigName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "DescribeAIWorkloadConfigResponse": { + "members": [ + {"name": "AIWorkloadConfigName", "shape": "AIEntityName", "type": "string"}, + {"name": "AIWorkloadConfigArn", "shape": "AIWorkloadConfigArn", "type": "string"}, + {"name": "DatasetConfig", "shape": "AIDatasetConfig", "type": "structure"}, + {"name": "AIWorkloadConfigs", "shape": "AIWorkloadConfigs", "type": "structure"}, + {"name": "Tags", "shape": "TagList", "type": "list"}, + {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, + ], + "type": "structure", + }, "DescribeActionRequest": { "members": [{"name": "ActionName", "shape": "ExperimentEntityNameOrArn", "type": "string"}], "type": "structure", @@ -5888,6 +6422,11 @@ "shape": "InferenceComponentSpecificationSummary", "type": "structure", }, + { + "name": "Specifications", + "shape": "InferenceComponentSpecificationSummaryList", + "type": "list", + }, { "name": "RuntimeConfig", "shape": "InferenceComponentRuntimeConfigSummary", @@ -6262,6 +6801,7 @@ "shape": "ModelPackageGroupStatus", "type": "string", }, + {"name": "ManagedConfiguration", "shape": "ManagedConfiguration", "type": "structure"}, ], "type": "structure", }, @@ -6326,6 +6866,7 @@ {"name": "SecurityConfig", "shape": "ModelPackageSecurityConfig", "type": "structure"}, {"name": "ModelCard", "shape": "ModelPackageModelCard", "type": "structure"}, {"name": "ModelLifeCycle", "shape": "ModelLifeCycle", "type": "structure"}, + {"name": "ManagedStorageType", "shape": "ManagedStorageType", "type": "string"}, ], "type": "structure", }, @@ -7774,6 +8315,11 @@ "type": "structure", }, "ExecutionRoleArns": {"member_shape": "RoleArn", "member_type": "string", "type": "list"}, + "ExpectedPerformanceList": { + "member_shape": "AIRecommendationPerformanceMetric", + "member_type": "structure", + "type": "list", + }, "Experiment": { "members": [ {"name": "ExperimentName", "shape": "ExperimentEntityName", "type": "string"}, @@ -9004,6 +9550,18 @@ "members": [{"name": "Arn", "shape": "String2048", "type": "string"}], "type": "structure", }, + "InferenceComponentPlacementStatus": { + "members": [ + {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, + {"name": "CurrentCopyCount", "shape": "InferenceComponentCopyCount", "type": "integer"}, + ], + "type": "structure", + }, + "InferenceComponentPlacementStatusList": { + "member_shape": "InferenceComponentPlacementStatus", + "member_type": "structure", + "type": "list", + }, "InferenceComponentRollingUpdatePolicy": { "members": [ { @@ -9035,6 +9593,11 @@ "members": [ {"name": "DesiredCopyCount", "shape": "InferenceComponentCopyCount", "type": "integer"}, {"name": "CurrentCopyCount", "shape": "InferenceComponentCopyCount", "type": "integer"}, + { + "name": "PlacementStatus", + "shape": "InferenceComponentPlacementStatusList", + "type": "list", + }, ], "type": "structure", }, @@ -9055,6 +9618,7 @@ }, "InferenceComponentSpecification": { "members": [ + {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, {"name": "ModelName", "shape": "ModelName", "type": "string"}, { "name": "Container", @@ -9089,8 +9653,14 @@ ], "type": "structure", }, + "InferenceComponentSpecificationList": { + "member_shape": "InferenceComponentSpecification", + "member_type": "structure", + "type": "list", + }, "InferenceComponentSpecificationSummary": { "members": [ + {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, {"name": "ModelName", "shape": "ModelName", "type": "string"}, { "name": "Container", @@ -9125,6 +9695,11 @@ ], "type": "structure", }, + "InferenceComponentSpecificationSummaryList": { + "member_shape": "InferenceComponentSpecificationSummary", + "member_type": "structure", + "type": "list", + }, "InferenceComponentStartupParameters": { "members": [ { @@ -9353,6 +9928,11 @@ "members": [ {"name": "CustomerEni", "shape": "String", "type": "string"}, {"name": "AdditionalEnis", "shape": "AdditionalEnis", "type": "structure"}, + { + "name": "InstanceRequirementsEniConfigurations", + "shape": "InstanceRequirementsEniConfigurations", + "type": "list", + }, {"name": "CapacityReservation", "shape": "CapacityReservation", "type": "structure"}, {"name": "FailureMessage", "shape": "String", "type": "string"}, {"name": "LcsExecutionState", "shape": "String", "type": "string"}, @@ -9377,6 +9957,43 @@ ], "type": "structure", }, + "InstancePool": { + "members": [ + {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, + {"name": "ModelNameOverride", "shape": "ModelName", "type": "string"}, + {"name": "Priority", "shape": "InstancePoolPriority", "type": "integer"}, + ], + "type": "structure", + }, + "InstancePoolList": { + "member_shape": "InstancePool", + "member_type": "structure", + "type": "list", + }, + "InstancePoolSummary": { + "members": [ + {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, + {"name": "CurrentInstanceCount", "shape": "TaskCount", "type": "integer"}, + ], + "type": "structure", + }, + "InstancePoolSummaryList": { + "member_shape": "InstancePoolSummary", + "member_type": "structure", + "type": "list", + }, + "InstanceRequirementsEniConfiguration": { + "members": [ + {"name": "CustomerEni", "shape": "String", "type": "string"}, + {"name": "AdditionalEnis", "shape": "AdditionalEnis", "type": "structure"}, + ], + "type": "structure", + }, + "InstanceRequirementsEniConfigurations": { + "member_shape": "InstanceRequirementsEniConfiguration", + "member_type": "structure", + "type": "list", + }, "IntegerParameterRange": { "members": [ {"name": "Name", "shape": "ParameterKey", "type": "string"}, @@ -9769,6 +10386,69 @@ ], "type": "structure", }, + "ListAIBenchmarkJobsRequest": { + "members": [ + {"name": "MaxResults", "shape": "MaxResults", "type": "integer"}, + {"name": "NextToken", "shape": "NextToken", "type": "string"}, + {"name": "NameContains", "shape": "NameContains", "type": "string"}, + {"name": "StatusEquals", "shape": "AIBenchmarkJobStatus", "type": "string"}, + {"name": "CreationTimeAfter", "shape": "Timestamp", "type": "timestamp"}, + {"name": "CreationTimeBefore", "shape": "Timestamp", "type": "timestamp"}, + {"name": "SortBy", "shape": "ListAIBenchmarkJobsSortBy", "type": "string"}, + {"name": "SortOrder", "shape": "SortOrder", "type": "string"}, + ], + "type": "structure", + }, + "ListAIBenchmarkJobsResponse": { + "members": [ + {"name": "AIBenchmarkJobs", "shape": "AIBenchmarkJobSummaryList", "type": "list"}, + {"name": "NextToken", "shape": "NextToken", "type": "string"}, + ], + "type": "structure", + }, + "ListAIRecommendationJobsRequest": { + "members": [ + {"name": "MaxResults", "shape": "MaxResults", "type": "integer"}, + {"name": "NextToken", "shape": "NextToken", "type": "string"}, + {"name": "NameContains", "shape": "NameContains", "type": "string"}, + {"name": "StatusEquals", "shape": "AIRecommendationJobStatus", "type": "string"}, + {"name": "CreationTimeAfter", "shape": "Timestamp", "type": "timestamp"}, + {"name": "CreationTimeBefore", "shape": "Timestamp", "type": "timestamp"}, + {"name": "SortBy", "shape": "ListAIRecommendationJobsSortBy", "type": "string"}, + {"name": "SortOrder", "shape": "SortOrder", "type": "string"}, + ], + "type": "structure", + }, + "ListAIRecommendationJobsResponse": { + "members": [ + { + "name": "AIRecommendationJobs", + "shape": "AIRecommendationJobSummaryList", + "type": "list", + }, + {"name": "NextToken", "shape": "NextToken", "type": "string"}, + ], + "type": "structure", + }, + "ListAIWorkloadConfigsRequest": { + "members": [ + {"name": "MaxResults", "shape": "MaxResults", "type": "integer"}, + {"name": "NextToken", "shape": "NextToken", "type": "string"}, + {"name": "NameContains", "shape": "NameContains", "type": "string"}, + {"name": "CreationTimeAfter", "shape": "Timestamp", "type": "timestamp"}, + {"name": "CreationTimeBefore", "shape": "Timestamp", "type": "timestamp"}, + {"name": "SortBy", "shape": "ListAIWorkloadConfigsSortBy", "type": "string"}, + {"name": "SortOrder", "shape": "SortOrder", "type": "string"}, + ], + "type": "structure", + }, + "ListAIWorkloadConfigsResponse": { + "members": [ + {"name": "AIWorkloadConfigs", "shape": "AIWorkloadConfigSummaryList", "type": "list"}, + {"name": "NextToken", "shape": "NextToken", "type": "string"}, + ], + "type": "structure", + }, "ListActionsRequest": { "members": [ {"name": "SourceUri", "shape": "SourceUri", "type": "string"}, @@ -11659,6 +12339,12 @@ ], "type": "structure", }, + "ManagedConfiguration": { + "members": [ + {"name": "ManagedStorageType", "shape": "ManagedStorageType", "type": "string"} + ], + "type": "structure", + }, "MapString2048": { "key_shape": "String2048", "key_type": "string", @@ -12300,6 +12986,11 @@ {"name": "Framework", "shape": "String", "type": "string"}, {"name": "FrameworkVersion", "shape": "ModelPackageFrameworkVersion", "type": "string"}, {"name": "NearestModelName", "shape": "String", "type": "string"}, + { + "name": "AdditionalModelDataSources", + "shape": "AdditionalModelDataSources", + "type": "list", + }, { "name": "AdditionalS3DataSource", "shape": "AdditionalS3DataSource", @@ -12351,6 +13042,7 @@ "shape": "ModelPackageGroupStatus", "type": "string", }, + {"name": "ManagedConfiguration", "shape": "ManagedConfiguration", "type": "structure"}, ], "type": "structure", }, @@ -13474,6 +14166,7 @@ {"name": "CurrentInstanceCount", "shape": "TaskCount", "type": "integer"}, {"name": "DesiredInstanceCount", "shape": "TaskCount", "type": "integer"}, {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, + {"name": "InstancePools", "shape": "InstancePoolSummaryList", "type": "list"}, { "name": "AcceleratorType", "shape": "ProductionVariantAcceleratorType", @@ -13982,6 +14675,12 @@ {"name": "ModelName", "shape": "ModelName", "type": "string"}, {"name": "InitialInstanceCount", "shape": "InitialTaskCount", "type": "integer"}, {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, + {"name": "InstancePools", "shape": "InstancePoolList", "type": "list"}, + { + "name": "VariantInstanceProvisionTimeoutInSeconds", + "shape": "VariantInstanceProvisionTimeoutInSeconds", + "type": "integer", + }, {"name": "InitialVariantWeight", "shape": "VariantWeight", "type": "float"}, { "name": "AcceleratorType", @@ -14168,6 +14867,7 @@ {"name": "DesiredWeight", "shape": "VariantWeight", "type": "float"}, {"name": "CurrentInstanceCount", "shape": "TaskCount", "type": "integer"}, {"name": "DesiredInstanceCount", "shape": "TaskCount", "type": "integer"}, + {"name": "InstancePools", "shape": "InstancePoolSummaryList", "type": "list"}, {"name": "VariantStatus", "shape": "ProductionVariantStatusList", "type": "list"}, { "name": "CurrentServerlessConfig", @@ -14481,7 +15181,7 @@ }, "RealTimeInferenceConfig": { "members": [ - {"name": "InstanceType", "shape": "InstanceType", "type": "string"}, + {"name": "InstanceType", "shape": "ProductionVariantInstanceType", "type": "string"}, {"name": "InstanceCount", "shape": "TaskCount", "type": "integer"}, ], "type": "structure", @@ -14910,6 +15610,11 @@ {"name": "SageMakerImageVersionAlias", "shape": "ImageVersionAlias", "type": "string"}, {"name": "InstanceType", "shape": "AppInstanceType", "type": "string"}, {"name": "LifecycleConfigArn", "shape": "StudioLifecycleConfigArn", "type": "string"}, + { + "name": "TrainingPlanArn", + "shape": "StudioResourceSpecTrainingPlanArn", + "type": "string", + }, ], "type": "structure", }, @@ -15625,6 +16330,24 @@ "value_shape": "SchedulerResourceStatus", "value_type": "string", }, + "StopAIBenchmarkJobRequest": { + "members": [{"name": "AIBenchmarkJobName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "StopAIBenchmarkJobResponse": { + "members": [{"name": "AIBenchmarkJobArn", "shape": "AIBenchmarkJobArn", "type": "string"}], + "type": "structure", + }, + "StopAIRecommendationJobRequest": { + "members": [{"name": "AIRecommendationJobName", "shape": "AIEntityName", "type": "string"}], + "type": "structure", + }, + "StopAIRecommendationJobResponse": { + "members": [ + {"name": "AIRecommendationJobArn", "shape": "AIRecommendationJobArn", "type": "string"} + ], + "type": "structure", + }, "StopAutoMLJobRequest": { "members": [{"name": "AutoMLJobName", "shape": "AutoMLJobName", "type": "string"}], "type": "structure", @@ -15782,6 +16505,11 @@ "shape": "HiddenSageMakerImageVersionAliasesList", "type": "list", }, + { + "name": "ExecutionRoleSessionNameMode", + "shape": "ExecutionRoleSessionNameMode", + "type": "string", + }, ], "type": "structure", }, @@ -16104,6 +16832,7 @@ {"name": "InputDataConfig", "shape": "InputDataConfig", "type": "list"}, {"name": "OutputDataConfig", "shape": "OutputDataConfig", "type": "structure"}, {"name": "ResourceConfig", "shape": "ResourceConfig", "type": "structure"}, + {"name": "WarmPoolStatus", "shape": "WarmPoolStatus", "type": "structure"}, {"name": "VpcConfig", "shape": "VpcConfig", "type": "structure"}, {"name": "StoppingCondition", "shape": "StoppingCondition", "type": "structure"}, {"name": "CreationTime", "shape": "Timestamp", "type": "timestamp"}, @@ -17209,6 +17938,11 @@ "shape": "InferenceComponentSpecification", "type": "structure", }, + { + "name": "Specifications", + "shape": "InferenceComponentSpecificationList", + "type": "list", + }, { "name": "RuntimeConfig", "shape": "InferenceComponentRuntimeConfig", @@ -17933,6 +18667,10 @@ "type": "structure", }, "Workforces": {"member_shape": "Workforce", "member_type": "structure", "type": "list"}, + "WorkloadSpec": { + "members": [{"name": "Inline", "shape": "String", "type": "string"}], + "type": "structure", + }, "WorkspaceSettings": { "members": [ {"name": "S3ArtifactPath", "shape": "S3Uri", "type": "string"}, diff --git a/src/sagemaker_core/main/config_schema.py b/src/sagemaker_core/main/config_schema.py index 47fa38e..8d481dd 100644 --- a/src/sagemaker_core/main/config_schema.py +++ b/src/sagemaker_core/main/config_schema.py @@ -16,6 +16,33 @@ "Resources": { "type": "object", "properties": { + "AIBenchmarkJob": { + "type": "object", + "properties": { + "output_config": {"s3_output_location": {"type": "string"}}, + "role_arn": {"type": "string"}, + "network_config": { + "vpc_config": { + "security_group_ids": { + "type": "array", + "items": {"type": "string"}, + }, + "subnets": { + "type": "array", + "items": {"type": "string"}, + }, + } + }, + }, + }, + "AIRecommendationJob": { + "type": "object", + "properties": { + "model_source": {"s3": {"s3_uri": {"type": "string"}}}, + "output_config": {"s3_output_location": {"type": "string"}}, + "role_arn": {"type": "string"}, + }, + }, "Algorithm": { "type": "object", "properties": { @@ -234,6 +261,11 @@ }, } }, + "studio_web_portal_settings": { + "execution_role_session_name_mode": { + "type": "string" + } + }, }, "domain_settings": { "security_group_ids": { @@ -904,6 +936,11 @@ }, } }, + "studio_web_portal_settings": { + "execution_role_session_name_mode": { + "type": "string" + } + }, } }, }, diff --git a/src/sagemaker_core/main/resources.py b/src/sagemaker_core/main/resources.py index 013bf07..30282c9 100644 --- a/src/sagemaker_core/main/resources.py +++ b/src/sagemaker_core/main/resources.py @@ -142,6 +142,1148 @@ def wrapper(*args, **kwargs): return wrapper +class AIBenchmarkJob(Base): + """ + Class representing resource AIBenchmarkJob + + Attributes: + ai_benchmark_job_name: The name of the AI benchmark job. + ai_benchmark_job_arn: The Amazon Resource Name (ARN) of the AI benchmark job. + ai_benchmark_job_status: The status of the AI benchmark job. + benchmark_target: The target endpoint that was benchmarked. + output_config: The output configuration for the benchmark job, including the Amazon S3 output location and CloudWatch log information. + ai_workload_config_identifier: The name or Amazon Resource Name (ARN) of the AI workload configuration used for this benchmark job. + role_arn: The Amazon Resource Name (ARN) of the IAM role used by the benchmark job. + creation_time: A timestamp that indicates when the benchmark job was created. + failure_reason: If the benchmark job failed, the reason it failed. + network_config: The network configuration for the benchmark job. + start_time: A timestamp that indicates when the benchmark job started running. + end_time: A timestamp that indicates when the benchmark job completed. + tags: The tags associated with the benchmark job. + + """ + + ai_benchmark_job_name: str + ai_benchmark_job_arn: Optional[str] = Unassigned() + ai_benchmark_job_status: Optional[str] = Unassigned() + failure_reason: Optional[str] = Unassigned() + benchmark_target: Optional[shapes.AIBenchmarkTarget] = Unassigned() + output_config: Optional[shapes.AIBenchmarkOutputResult] = Unassigned() + ai_workload_config_identifier: Optional[str] = Unassigned() + role_arn: Optional[str] = Unassigned() + network_config: Optional[shapes.AIBenchmarkNetworkConfig] = Unassigned() + creation_time: Optional[datetime.datetime] = Unassigned() + start_time: Optional[datetime.datetime] = Unassigned() + end_time: Optional[datetime.datetime] = Unassigned() + tags: Optional[List[shapes.Tag]] = Unassigned() + + def get_name(self) -> str: + attributes = vars(self) + resource_name = "ai_benchmark_job_name" + resource_name_split = resource_name.split("_") + attribute_name_candidates = [] + + l = len(resource_name_split) + for i in range(0, l): + attribute_name_candidates.append("_".join(resource_name_split[i:l])) + + for attribute, value in attributes.items(): + if attribute == "name" or attribute in attribute_name_candidates: + return value + logger.error("Name attribute not found for object ai_benchmark_job") + return None + + @classmethod + @Base.add_validate_call + def create( + cls, + ai_benchmark_job_name: str, + benchmark_target: shapes.AIBenchmarkTarget, + output_config: shapes.AIBenchmarkOutputConfig, + ai_workload_config_identifier: str, + role_arn: str, + network_config: Optional[shapes.AIBenchmarkNetworkConfig] = Unassigned(), + tags: Optional[List[shapes.Tag]] = Unassigned(), + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> Optional["AIBenchmarkJob"]: + """ + Create a AIBenchmarkJob resource + + Parameters: + ai_benchmark_job_name: The name of the AI benchmark job. The name must be unique within your Amazon Web Services account in the current Amazon Web Services Region. + benchmark_target: The target endpoint to benchmark. Specify a SageMaker endpoint by providing its name or Amazon Resource Name (ARN). + output_config: The output configuration for the benchmark job, including the Amazon S3 location where benchmark results are stored. + ai_workload_config_identifier: The name or Amazon Resource Name (ARN) of the AI workload configuration to use for this benchmark job. + role_arn: The Amazon Resource Name (ARN) of an IAM role that enables Amazon SageMaker AI to perform tasks on your behalf. + network_config: The network configuration for the benchmark job, including VPC settings. + tags: The metadata that you apply to Amazon Web Services resources to help you categorize and organize them. Each tag consists of a key and a value, both of which you define. + session: Boto3 session. + region: Region name. + + Returns: + The AIBenchmarkJob resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceInUse: Resource being accessed is in use. + ResourceLimitExceeded: You have exceeded an SageMaker resource limit. For example, you might have too many training jobs created. + ResourceNotFound: Resource being access is not found. + ConfigSchemaValidationError: Raised when a configuration file does not adhere to the schema + LocalConfigNotFoundError: Raised when a configuration file is not found in local file system + S3ConfigNotFoundError: Raised when a configuration file is not found in S3 + """ + + logger.info("Creating ai_benchmark_job resource.") + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + + operation_input_args = { + "AIBenchmarkJobName": ai_benchmark_job_name, + "BenchmarkTarget": benchmark_target, + "OutputConfig": output_config, + "AIWorkloadConfigIdentifier": ai_workload_config_identifier, + "RoleArn": role_arn, + "NetworkConfig": network_config, + "Tags": tags, + } + + operation_input_args = Base.populate_chained_attributes( + resource_name="AIBenchmarkJob", operation_input_args=operation_input_args + ) + + logger.debug(f"Input request: {operation_input_args}") + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + # create the resource + response = client.create_ai_benchmark_job(**operation_input_args) + logger.debug(f"Response: {response}") + + return cls.get(ai_benchmark_job_name=ai_benchmark_job_name, session=session, region=region) + + @classmethod + @Base.add_validate_call + def get( + cls, + ai_benchmark_job_name: str, + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> Optional["AIBenchmarkJob"]: + """ + Get a AIBenchmarkJob resource + + Parameters: + ai_benchmark_job_name: The name of the AI benchmark job to describe. + session: Boto3 session. + region: Region name. + + Returns: + The AIBenchmarkJob resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + operation_input_args = { + "AIBenchmarkJobName": ai_benchmark_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + response = client.describe_ai_benchmark_job(**operation_input_args) + + logger.debug(response) + + # deserialize the response + transformed_response = transform(response, "DescribeAIBenchmarkJobResponse") + ai_benchmark_job = cls(**transformed_response) + return ai_benchmark_job + + @Base.add_validate_call + def refresh( + self, + ) -> Optional["AIBenchmarkJob"]: + """ + Refresh a AIBenchmarkJob resource + + Returns: + The AIBenchmarkJob resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + operation_input_args = { + "AIBenchmarkJobName": self.ai_benchmark_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client = Base.get_sagemaker_client() + response = client.describe_ai_benchmark_job(**operation_input_args) + + # deserialize response and update self + transform(response, "DescribeAIBenchmarkJobResponse", self) + return self + + @Base.add_validate_call + def delete( + self, + ) -> None: + """ + Delete a AIBenchmarkJob resource + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + client = Base.get_sagemaker_client() + + operation_input_args = { + "AIBenchmarkJobName": self.ai_benchmark_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client.delete_ai_benchmark_job(**operation_input_args) + + logger.info(f"Deleting {self.__class__.__name__} - {self.get_name()}") + + @Base.add_validate_call + def stop(self) -> None: + """ + Stop a AIBenchmarkJob resource + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + client = SageMakerClient().client + + operation_input_args = { + "AIBenchmarkJobName": self.ai_benchmark_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client.stop_ai_benchmark_job(**operation_input_args) + + logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}") + + @Base.add_validate_call + def wait( + self, + poll: int = 5, + timeout: Optional[int] = None, + ) -> None: + """ + Wait for a AIBenchmarkJob resource. + + Parameters: + poll: The number of seconds to wait between each poll. + timeout: The maximum number of seconds to wait before timing out. + + Raises: + TimeoutExceededError: If the resource does not reach a terminal state before the timeout. + FailedStatusError: If the resource reaches a failed state. + WaiterError: Raised when an error occurs while waiting. + + """ + terminal_states = ["Completed", "Failed", "Stopped"] + start_time = time.time() + + progress = Progress( + SpinnerColumn("bouncingBar"), + TextColumn("{task.description}"), + TimeElapsedColumn(), + ) + progress.add_task("Waiting for AIBenchmarkJob...") + status = Status("Current status:") + + with Live( + Panel( + Group(progress, status), + title="Wait Log Panel", + border_style=Style(color=Color.BLUE.value), + ), + transient=True, + ): + while True: + self.refresh() + current_status = self.ai_benchmark_job_status + status.update(f"Current status: [bold]{current_status}") + + if current_status in terminal_states: + logger.info(f"Final Resource Status: [bold]{current_status}") + + if "failed" in current_status.lower(): + raise FailedStatusError( + resource_type="AIBenchmarkJob", + status=current_status, + reason=self.failure_reason, + ) + + return + + if timeout is not None and time.time() - start_time >= timeout: + raise TimeoutExceededError(resouce_type="AIBenchmarkJob", status=current_status) + time.sleep(poll) + + @classmethod + @Base.add_validate_call + def get_all( + cls, + name_contains: Optional[str] = Unassigned(), + status_equals: Optional[str] = Unassigned(), + creation_time_after: Optional[datetime.datetime] = Unassigned(), + creation_time_before: Optional[datetime.datetime] = Unassigned(), + sort_by: Optional[str] = Unassigned(), + sort_order: Optional[str] = Unassigned(), + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> ResourceIterator["AIBenchmarkJob"]: + """ + Get all AIBenchmarkJob resources + + Parameters: + max_results: The maximum number of benchmark jobs to return in the response. + next_token: If the previous call to ListAIBenchmarkJobs didn't return the full set of jobs, the call returns a token for getting the next set. + name_contains: A string in the job name. This filter returns only jobs whose name contains the specified string. + status_equals: A filter that returns only benchmark jobs with the specified status. + creation_time_after: A filter that returns only jobs created after the specified time. + creation_time_before: A filter that returns only jobs created before the specified time. + sort_by: The field to sort results by. The default is CreationTime. + sort_order: The sort order for results. The default is Descending. + session: Boto3 session. + region: Region name. + + Returns: + Iterator for listed AIBenchmarkJob resources. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + """ + + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + + operation_input_args = { + "NameContains": name_contains, + "StatusEquals": status_equals, + "CreationTimeAfter": creation_time_after, + "CreationTimeBefore": creation_time_before, + "SortBy": sort_by, + "SortOrder": sort_order, + } + + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + return ResourceIterator( + client=client, + list_method="list_ai_benchmark_jobs", + summaries_key="AIBenchmarkJobs", + summary_name="AIBenchmarkJobSummary", + resource_cls=AIBenchmarkJob, + list_method_kwargs=operation_input_args, + ) + + +class AIRecommendationJob(Base): + """ + Class representing resource AIRecommendationJob + + Attributes: + ai_recommendation_job_name: The name of the AI recommendation job. + ai_recommendation_job_arn: The Amazon Resource Name (ARN) of the AI recommendation job. + ai_recommendation_job_status: The status of the AI recommendation job. + model_source: The source of the model that was analyzed. + output_config: The output configuration for the recommendation job. + ai_workload_config_identifier: The name or Amazon Resource Name (ARN) of the AI workload configuration used for this recommendation job. + role_arn: The Amazon Resource Name (ARN) of the IAM role used by the recommendation job. + creation_time: A timestamp that indicates when the recommendation job was created. + failure_reason: If the recommendation job failed, the reason it failed. + inference_specification: The inference framework configuration. + optimize_model: Whether model optimization techniques were allowed. + performance_target: The performance targets specified for the recommendation job. + recommendations: The list of optimization recommendations generated by the job. Each recommendation includes optimization details, deployment configuration, expected performance metrics, and the associated benchmark job ARN. + compute_spec: The compute resource specification for the recommendation job. + start_time: A timestamp that indicates when the recommendation job started running. + end_time: A timestamp that indicates when the recommendation job completed. + tags: The tags associated with the recommendation job. + + """ + + ai_recommendation_job_name: str + ai_recommendation_job_arn: Optional[str] = Unassigned() + ai_recommendation_job_status: Optional[str] = Unassigned() + failure_reason: Optional[str] = Unassigned() + model_source: Optional[shapes.AIModelSource] = Unassigned() + output_config: Optional[shapes.AIRecommendationOutputResult] = Unassigned() + inference_specification: Optional[shapes.AIRecommendationInferenceSpecification] = Unassigned() + ai_workload_config_identifier: Optional[str] = Unassigned() + optimize_model: Optional[bool] = Unassigned() + performance_target: Optional[shapes.AIRecommendationPerformanceTarget] = Unassigned() + recommendations: Optional[List[shapes.AIRecommendation]] = Unassigned() + role_arn: Optional[str] = Unassigned() + compute_spec: Optional[shapes.AIRecommendationComputeSpec] = Unassigned() + creation_time: Optional[datetime.datetime] = Unassigned() + start_time: Optional[datetime.datetime] = Unassigned() + end_time: Optional[datetime.datetime] = Unassigned() + tags: Optional[List[shapes.Tag]] = Unassigned() + + def get_name(self) -> str: + attributes = vars(self) + resource_name = "ai_recommendation_job_name" + resource_name_split = resource_name.split("_") + attribute_name_candidates = [] + + l = len(resource_name_split) + for i in range(0, l): + attribute_name_candidates.append("_".join(resource_name_split[i:l])) + + for attribute, value in attributes.items(): + if attribute == "name" or attribute in attribute_name_candidates: + return value + logger.error("Name attribute not found for object ai_recommendation_job") + return None + + @classmethod + @Base.add_validate_call + def create( + cls, + ai_recommendation_job_name: str, + model_source: shapes.AIModelSource, + output_config: shapes.AIRecommendationOutputConfig, + ai_workload_config_identifier: str, + performance_target: shapes.AIRecommendationPerformanceTarget, + role_arn: str, + inference_specification: Optional[ + shapes.AIRecommendationInferenceSpecification + ] = Unassigned(), + optimize_model: Optional[bool] = Unassigned(), + compute_spec: Optional[shapes.AIRecommendationComputeSpec] = Unassigned(), + tags: Optional[List[shapes.Tag]] = Unassigned(), + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> Optional["AIRecommendationJob"]: + """ + Create a AIRecommendationJob resource + + Parameters: + ai_recommendation_job_name: The name of the AI recommendation job. The name must be unique within your Amazon Web Services account in the current Amazon Web Services Region. + model_source: The source of the model to optimize. Specify the Amazon S3 location of the model artifacts. + output_config: The output configuration for the recommendation job, including the Amazon S3 location for results and an optional model package group where the optimized model is registered. + ai_workload_config_identifier: The name or Amazon Resource Name (ARN) of the AI workload configuration to use for this recommendation job. + performance_target: The performance targets for the recommendation job. Specify constraints on metrics such as time to first token (ttft-ms), throughput, or cost. + role_arn: The Amazon Resource Name (ARN) of an IAM role that enables Amazon SageMaker AI to perform tasks on your behalf. + inference_specification: The inference framework configuration. Specify the framework (such as LMI or vLLM) for the recommendation job. + optimize_model: Whether to allow model optimization techniques such as quantization, speculative decoding, and kernel tuning. The default is true. + compute_spec: The compute resource specification for the recommendation job. You can specify up to 3 instance types to consider, and optionally provide capacity reservation configuration. + tags: The metadata that you apply to Amazon Web Services resources to help you categorize and organize them. + session: Boto3 session. + region: Region name. + + Returns: + The AIRecommendationJob resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceInUse: Resource being accessed is in use. + ResourceLimitExceeded: You have exceeded an SageMaker resource limit. For example, you might have too many training jobs created. + ResourceNotFound: Resource being access is not found. + ConfigSchemaValidationError: Raised when a configuration file does not adhere to the schema + LocalConfigNotFoundError: Raised when a configuration file is not found in local file system + S3ConfigNotFoundError: Raised when a configuration file is not found in S3 + """ + + logger.info("Creating ai_recommendation_job resource.") + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + + operation_input_args = { + "AIRecommendationJobName": ai_recommendation_job_name, + "ModelSource": model_source, + "OutputConfig": output_config, + "AIWorkloadConfigIdentifier": ai_workload_config_identifier, + "PerformanceTarget": performance_target, + "RoleArn": role_arn, + "InferenceSpecification": inference_specification, + "OptimizeModel": optimize_model, + "ComputeSpec": compute_spec, + "Tags": tags, + } + + operation_input_args = Base.populate_chained_attributes( + resource_name="AIRecommendationJob", operation_input_args=operation_input_args + ) + + logger.debug(f"Input request: {operation_input_args}") + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + # create the resource + response = client.create_ai_recommendation_job(**operation_input_args) + logger.debug(f"Response: {response}") + + return cls.get( + ai_recommendation_job_name=ai_recommendation_job_name, session=session, region=region + ) + + @classmethod + @Base.add_validate_call + def get( + cls, + ai_recommendation_job_name: str, + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> Optional["AIRecommendationJob"]: + """ + Get a AIRecommendationJob resource + + Parameters: + ai_recommendation_job_name: The name of the AI recommendation job to describe. + session: Boto3 session. + region: Region name. + + Returns: + The AIRecommendationJob resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + operation_input_args = { + "AIRecommendationJobName": ai_recommendation_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + response = client.describe_ai_recommendation_job(**operation_input_args) + + logger.debug(response) + + # deserialize the response + transformed_response = transform(response, "DescribeAIRecommendationJobResponse") + ai_recommendation_job = cls(**transformed_response) + return ai_recommendation_job + + @Base.add_validate_call + def refresh( + self, + ) -> Optional["AIRecommendationJob"]: + """ + Refresh a AIRecommendationJob resource + + Returns: + The AIRecommendationJob resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + operation_input_args = { + "AIRecommendationJobName": self.ai_recommendation_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client = Base.get_sagemaker_client() + response = client.describe_ai_recommendation_job(**operation_input_args) + + # deserialize response and update self + transform(response, "DescribeAIRecommendationJobResponse", self) + return self + + @Base.add_validate_call + def delete( + self, + ) -> None: + """ + Delete a AIRecommendationJob resource + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + client = Base.get_sagemaker_client() + + operation_input_args = { + "AIRecommendationJobName": self.ai_recommendation_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client.delete_ai_recommendation_job(**operation_input_args) + + logger.info(f"Deleting {self.__class__.__name__} - {self.get_name()}") + + @Base.add_validate_call + def stop(self) -> None: + """ + Stop a AIRecommendationJob resource + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + client = SageMakerClient().client + + operation_input_args = { + "AIRecommendationJobName": self.ai_recommendation_job_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client.stop_ai_recommendation_job(**operation_input_args) + + logger.info(f"Stopping {self.__class__.__name__} - {self.get_name()}") + + @Base.add_validate_call + def wait( + self, + poll: int = 5, + timeout: Optional[int] = None, + ) -> None: + """ + Wait for a AIRecommendationJob resource. + + Parameters: + poll: The number of seconds to wait between each poll. + timeout: The maximum number of seconds to wait before timing out. + + Raises: + TimeoutExceededError: If the resource does not reach a terminal state before the timeout. + FailedStatusError: If the resource reaches a failed state. + WaiterError: Raised when an error occurs while waiting. + + """ + terminal_states = ["Completed", "Failed", "Stopped"] + start_time = time.time() + + progress = Progress( + SpinnerColumn("bouncingBar"), + TextColumn("{task.description}"), + TimeElapsedColumn(), + ) + progress.add_task("Waiting for AIRecommendationJob...") + status = Status("Current status:") + + with Live( + Panel( + Group(progress, status), + title="Wait Log Panel", + border_style=Style(color=Color.BLUE.value), + ), + transient=True, + ): + while True: + self.refresh() + current_status = self.ai_recommendation_job_status + status.update(f"Current status: [bold]{current_status}") + + if current_status in terminal_states: + logger.info(f"Final Resource Status: [bold]{current_status}") + + if "failed" in current_status.lower(): + raise FailedStatusError( + resource_type="AIRecommendationJob", + status=current_status, + reason=self.failure_reason, + ) + + return + + if timeout is not None and time.time() - start_time >= timeout: + raise TimeoutExceededError( + resouce_type="AIRecommendationJob", status=current_status + ) + time.sleep(poll) + + @classmethod + @Base.add_validate_call + def get_all( + cls, + name_contains: Optional[str] = Unassigned(), + status_equals: Optional[str] = Unassigned(), + creation_time_after: Optional[datetime.datetime] = Unassigned(), + creation_time_before: Optional[datetime.datetime] = Unassigned(), + sort_by: Optional[str] = Unassigned(), + sort_order: Optional[str] = Unassigned(), + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> ResourceIterator["AIRecommendationJob"]: + """ + Get all AIRecommendationJob resources + + Parameters: + max_results: The maximum number of recommendation jobs to return in the response. + next_token: If the previous call to ListAIRecommendationJobs didn't return the full set of jobs, the call returns a token for getting the next set. + name_contains: A string in the job name. This filter returns only jobs whose name contains the specified string. + status_equals: A filter that returns only recommendation jobs with the specified status. + creation_time_after: A filter that returns only jobs created after the specified time. + creation_time_before: A filter that returns only jobs created before the specified time. + sort_by: The field to sort results by. The default is CreationTime. + sort_order: The sort order for results. The default is Descending. + session: Boto3 session. + region: Region name. + + Returns: + Iterator for listed AIRecommendationJob resources. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + """ + + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + + operation_input_args = { + "NameContains": name_contains, + "StatusEquals": status_equals, + "CreationTimeAfter": creation_time_after, + "CreationTimeBefore": creation_time_before, + "SortBy": sort_by, + "SortOrder": sort_order, + } + + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + return ResourceIterator( + client=client, + list_method="list_ai_recommendation_jobs", + summaries_key="AIRecommendationJobs", + summary_name="AIRecommendationJobSummary", + resource_cls=AIRecommendationJob, + list_method_kwargs=operation_input_args, + ) + + +class AIWorkloadConfig(Base): + """ + Class representing resource AIWorkloadConfig + + Attributes: + ai_workload_config_name: The name of the AI workload configuration. + ai_workload_config_arn: The Amazon Resource Name (ARN) of the AI workload configuration. + creation_time: A timestamp that indicates when the AI workload configuration was created. + dataset_config: The dataset configuration for the workload. + ai_workload_configs: The benchmark tool configuration and workload specification. + tags: The tags associated with the AI workload configuration. + + """ + + ai_workload_config_name: str + ai_workload_config_arn: Optional[str] = Unassigned() + dataset_config: Optional[shapes.AIDatasetConfig] = Unassigned() + ai_workload_configs: Optional[shapes.AIWorkloadConfigs] = Unassigned() + tags: Optional[List[shapes.Tag]] = Unassigned() + creation_time: Optional[datetime.datetime] = Unassigned() + + def get_name(self) -> str: + attributes = vars(self) + resource_name = "ai_workload_config_name" + resource_name_split = resource_name.split("_") + attribute_name_candidates = [] + + l = len(resource_name_split) + for i in range(0, l): + attribute_name_candidates.append("_".join(resource_name_split[i:l])) + + for attribute, value in attributes.items(): + if attribute == "name" or attribute in attribute_name_candidates: + return value + logger.error("Name attribute not found for object ai_workload_config") + return None + + @classmethod + @Base.add_validate_call + def create( + cls, + ai_workload_config_name: str, + dataset_config: Optional[shapes.AIDatasetConfig] = Unassigned(), + ai_workload_configs: Optional[shapes.AIWorkloadConfigs] = Unassigned(), + tags: Optional[List[shapes.Tag]] = Unassigned(), + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> Optional["AIWorkloadConfig"]: + """ + Create a AIWorkloadConfig resource + + Parameters: + ai_workload_config_name: The name of the AI workload configuration. The name must be unique within your Amazon Web Services account in the current Amazon Web Services Region. + dataset_config: The dataset configuration for the workload. Specify input data channels with their data sources for benchmark workloads. + ai_workload_configs: The benchmark tool configuration and workload specification. Provide the specification as an inline YAML or JSON string. + tags: The metadata that you apply to Amazon Web Services resources to help you categorize and organize them. Each tag consists of a key and a value, both of which you define. For more information, see Tagging Amazon Web Services Resources in the Amazon Web Services General Reference. + session: Boto3 session. + region: Region name. + + Returns: + The AIWorkloadConfig resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceInUse: Resource being accessed is in use. + ResourceLimitExceeded: You have exceeded an SageMaker resource limit. For example, you might have too many training jobs created. + ConfigSchemaValidationError: Raised when a configuration file does not adhere to the schema + LocalConfigNotFoundError: Raised when a configuration file is not found in local file system + S3ConfigNotFoundError: Raised when a configuration file is not found in S3 + """ + + logger.info("Creating ai_workload_config resource.") + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + + operation_input_args = { + "AIWorkloadConfigName": ai_workload_config_name, + "DatasetConfig": dataset_config, + "AIWorkloadConfigs": ai_workload_configs, + "Tags": tags, + } + + operation_input_args = Base.populate_chained_attributes( + resource_name="AIWorkloadConfig", operation_input_args=operation_input_args + ) + + logger.debug(f"Input request: {operation_input_args}") + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + # create the resource + response = client.create_ai_workload_config(**operation_input_args) + logger.debug(f"Response: {response}") + + return cls.get( + ai_workload_config_name=ai_workload_config_name, session=session, region=region + ) + + @classmethod + @Base.add_validate_call + def get( + cls, + ai_workload_config_name: str, + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> Optional["AIWorkloadConfig"]: + """ + Get a AIWorkloadConfig resource + + Parameters: + ai_workload_config_name: The name of the AI workload configuration to describe. + session: Boto3 session. + region: Region name. + + Returns: + The AIWorkloadConfig resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + operation_input_args = { + "AIWorkloadConfigName": ai_workload_config_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + response = client.describe_ai_workload_config(**operation_input_args) + + logger.debug(response) + + # deserialize the response + transformed_response = transform(response, "DescribeAIWorkloadConfigResponse") + ai_workload_config = cls(**transformed_response) + return ai_workload_config + + @Base.add_validate_call + def refresh( + self, + ) -> Optional["AIWorkloadConfig"]: + """ + Refresh a AIWorkloadConfig resource + + Returns: + The AIWorkloadConfig resource. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceNotFound: Resource being access is not found. + """ + + operation_input_args = { + "AIWorkloadConfigName": self.ai_workload_config_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client = Base.get_sagemaker_client() + response = client.describe_ai_workload_config(**operation_input_args) + + # deserialize response and update self + transform(response, "DescribeAIWorkloadConfigResponse", self) + return self + + @Base.add_validate_call + def delete( + self, + ) -> None: + """ + Delete a AIWorkloadConfig resource + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + ResourceInUse: Resource being accessed is in use. + ResourceNotFound: Resource being access is not found. + """ + + client = Base.get_sagemaker_client() + + operation_input_args = { + "AIWorkloadConfigName": self.ai_workload_config_name, + } + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + client.delete_ai_workload_config(**operation_input_args) + + logger.info(f"Deleting {self.__class__.__name__} - {self.get_name()}") + + @classmethod + @Base.add_validate_call + def get_all( + cls, + name_contains: Optional[str] = Unassigned(), + creation_time_after: Optional[datetime.datetime] = Unassigned(), + creation_time_before: Optional[datetime.datetime] = Unassigned(), + sort_by: Optional[str] = Unassigned(), + sort_order: Optional[str] = Unassigned(), + session: Optional[Session] = None, + region: Optional[str] = None, + ) -> ResourceIterator["AIWorkloadConfig"]: + """ + Get all AIWorkloadConfig resources + + Parameters: + max_results: The maximum number of AI workload configurations to return in the response. + next_token: If the previous call to ListAIWorkloadConfigs didn't return the full set of configurations, the call returns a token for getting the next set of configurations. + name_contains: A string in the configuration name. This filter returns only configurations whose name contains the specified string. + creation_time_after: A filter that returns only configurations created after the specified time. + creation_time_before: A filter that returns only configurations created before the specified time. + sort_by: The field to sort results by. The default is CreationTime. + sort_order: The sort order for results. The default is Descending. + session: Boto3 session. + region: Region name. + + Returns: + Iterator for listed AIWorkloadConfig resources. + + Raises: + botocore.exceptions.ClientError: This exception is raised for AWS service related errors. + The error message and error code can be parsed from the exception as follows: + ``` + try: + # AWS service call here + except botocore.exceptions.ClientError as e: + error_message = e.response['Error']['Message'] + error_code = e.response['Error']['Code'] + ``` + """ + + client = Base.get_sagemaker_client( + session=session, region_name=region, service_name="sagemaker" + ) + + operation_input_args = { + "NameContains": name_contains, + "CreationTimeAfter": creation_time_after, + "CreationTimeBefore": creation_time_before, + "SortBy": sort_by, + "SortOrder": sort_order, + } + + # serialize the input request + operation_input_args = serialize(operation_input_args) + logger.debug(f"Serialized input request: {operation_input_args}") + + return ResourceIterator( + client=client, + list_method="list_ai_workload_configs", + summaries_key="AIWorkloadConfigs", + summary_name="AIWorkloadConfigSummary", + resource_cls=AIWorkloadConfig, + list_method_kwargs=operation_input_args, + ) + + class Action(Base): """ Class representing resource Action @@ -15346,6 +16488,7 @@ class InferenceComponent(Base): variant_name: The name of the production variant that hosts the inference component. failure_reason: If the inference component status is Failed, the reason for the failure. specification: Details about the resources that are deployed with this inference component. + specifications: A list of specification summaries for the inference component, one per instance type. This parameter is populated when the inference component was created with multiple specifications. When this parameter is populated, the singular Specification parameter is not returned. runtime_config: Details about the runtime settings for the model that is deployed with the inference component. inference_component_status: The status of the inference component. last_deployment_config: The deployment and rollback settings that you assigned to the inference component. @@ -15359,6 +16502,7 @@ class InferenceComponent(Base): variant_name: Optional[str] = Unassigned() failure_reason: Optional[str] = Unassigned() specification: Optional[shapes.InferenceComponentSpecificationSummary] = Unassigned() + specifications: Optional[List[shapes.InferenceComponentSpecificationSummary]] = Unassigned() runtime_config: Optional[shapes.InferenceComponentRuntimeConfigSummary] = Unassigned() creation_time: Optional[datetime.datetime] = Unassigned() last_modified_time: Optional[datetime.datetime] = Unassigned() @@ -15389,6 +16533,7 @@ def create( endpoint_name: Union[str, object], variant_name: Optional[str] = Unassigned(), specification: Optional[shapes.InferenceComponentSpecification] = Unassigned(), + specifications: Optional[List[shapes.InferenceComponentSpecification]] = Unassigned(), runtime_config: Optional[shapes.InferenceComponentRuntimeConfig] = Unassigned(), tags: Optional[List[shapes.Tag]] = Unassigned(), session: Optional[Session] = None, @@ -15402,6 +16547,7 @@ def create( endpoint_name: The name of an existing endpoint where you host the inference component. variant_name: The name of an existing production variant where you host the inference component. specification: Details about the resources to deploy with this inference component, including the model, container, and compute resources. + specifications: A list of specification objects for the inference component, one per instance type. Use this parameter when you want to deploy a different model or resource configuration for the inference component on each instance type. You can use either this parameter or the singular Specification parameter, but not both. runtime_config: Runtime settings for a model that is deployed with an inference component. tags: A list of key-value pairs associated with the model. For more information, see Tagging Amazon Web Services resources in the Amazon Web Services General Reference. session: Boto3 session. @@ -15436,6 +16582,7 @@ def create( "EndpointName": endpoint_name, "VariantName": variant_name, "Specification": specification, + "Specifications": specifications, "RuntimeConfig": runtime_config, "Tags": tags, } @@ -15547,6 +16694,7 @@ def refresh( def update( self, specification: Optional[shapes.InferenceComponentSpecification] = Unassigned(), + specifications: Optional[List[shapes.InferenceComponentSpecification]] = Unassigned(), runtime_config: Optional[shapes.InferenceComponentRuntimeConfig] = Unassigned(), deployment_config: Optional[shapes.InferenceComponentDeploymentConfig] = Unassigned(), ) -> Optional["InferenceComponent"]: @@ -15578,6 +16726,7 @@ def update( operation_input_args = { "InferenceComponentName": self.inference_component_name, "Specification": specification, + "Specifications": specifications, "RuntimeConfig": runtime_config, "DeploymentConfig": deployment_config, } @@ -20758,6 +21907,7 @@ class ModelPackage(Base): security_config: The KMS Key ID (KMSKeyId) used for encryption of model package information. model_card: The model card associated with the model package. Since ModelPackageModelCard is tied to a model package, it is a specific usage of a model card and its schema is simplified compared to the schema of ModelCard. The ModelPackageModelCard schema does not include model_package_details, and model_overview is composed of the model_creator and model_artifact properties. For more information about the model package model card schema, see Model package model card schema. For more information about the model card associated with the model package, see View the Details of a Model Version. model_life_cycle: A structure describing the current state of the model in its life cycle. + managed_storage_type: The storage type of the model package. """ @@ -20794,6 +21944,7 @@ class ModelPackage(Base): security_config: Optional[shapes.ModelPackageSecurityConfig] = Unassigned() model_card: Optional[shapes.ModelPackageModelCard] = Unassigned() model_life_cycle: Optional[shapes.ModelLifeCycle] = Unassigned() + managed_storage_type: Optional[str] = Unassigned() def get_name(self) -> str: attributes = vars(self) @@ -20897,6 +22048,7 @@ def create( security_config: Optional[shapes.ModelPackageSecurityConfig] = Unassigned(), model_card: Optional[shapes.ModelPackageModelCard] = Unassigned(), model_life_cycle: Optional[shapes.ModelLifeCycle] = Unassigned(), + managed_storage_type: Optional[str] = Unassigned(), session: Optional[Session] = None, region: Optional[str] = None, ) -> Optional["ModelPackage"]: @@ -20928,6 +22080,7 @@ def create( security_config: The KMS Key ID (KMSKeyId) used for encryption of model package information. model_card: The model card associated with the model package. Since ModelPackageModelCard is tied to a model package, it is a specific usage of a model card and its schema is simplified compared to the schema of ModelCard. The ModelPackageModelCard schema does not include model_package_details, and model_overview is composed of the model_creator and model_artifact properties. For more information about the model package model card schema, see Model package model card schema. For more information about the model card associated with the model package, see View the Details of a Model Version. model_life_cycle: A structure describing the current state of the model in its life cycle. + managed_storage_type: The storage type of the model package. session: Boto3 session. region: Region name. @@ -20981,6 +22134,7 @@ def create( "SecurityConfig": security_config, "ModelCard": model_card, "ModelLifeCycle": model_life_cycle, + "ManagedStorageType": managed_storage_type, } operation_input_args = Base.populate_chained_attributes( @@ -21444,6 +22598,7 @@ class ModelPackageGroup(Base): created_by: model_package_group_status: The status of the model group. model_package_group_description: A description of the model group. + managed_configuration: The managed configuration of the model package group. """ @@ -21453,6 +22608,7 @@ class ModelPackageGroup(Base): creation_time: Optional[datetime.datetime] = Unassigned() created_by: Optional[shapes.UserContext] = Unassigned() model_package_group_status: Optional[str] = Unassigned() + managed_configuration: Optional[shapes.ManagedConfiguration] = Unassigned() def get_name(self) -> str: attributes = vars(self) @@ -21477,6 +22633,7 @@ def create( model_package_group_name: str, model_package_group_description: Optional[str] = Unassigned(), tags: Optional[List[shapes.Tag]] = Unassigned(), + managed_configuration: Optional[shapes.ManagedConfiguration] = Unassigned(), session: Optional[Session] = None, region: Optional[str] = None, ) -> Optional["ModelPackageGroup"]: @@ -21487,6 +22644,7 @@ def create( model_package_group_name: The name of the model group. model_package_group_description: A description for the model group. tags: A list of key value pairs associated with the model group. For more information, see Tagging Amazon Web Services resources in the Amazon Web Services General Reference Guide. + managed_configuration: The managed configuration of the model package group. session: Boto3 session. region: Region name. @@ -21518,6 +22676,7 @@ def create( "ModelPackageGroupName": model_package_group_name, "ModelPackageGroupDescription": model_package_group_description, "Tags": tags, + "ManagedConfiguration": managed_configuration, } operation_input_args = Base.populate_chained_attributes( @@ -29852,7 +31011,7 @@ class TrainingPlan(Base): unhealthy_instance_count: The number of instances in the training plan that are currently in an unhealthy state. available_spare_instance_count: The number of available spare instances in the training plan. total_ultra_server_count: The total number of UltraServers reserved to this training plan. - target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group. A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment. + target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints, Studio apps) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group. A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment. A training plan for Studio apps can be used to launch JupyterLab and Code Editor apps on reserved training plan capacity. reserved_capacity_summaries: The list of Reserved Capacity providing the underlying compute resources of the plan. """ diff --git a/src/sagemaker_core/main/shapes.py b/src/sagemaker_core/main/shapes.py index 168be4b..04b46f1 100644 --- a/src/sagemaker_core/main/shapes.py +++ b/src/sagemaker_core/main/shapes.py @@ -452,6 +452,515 @@ class RawMetricData(Base): step: Optional[int] = Unassigned() +class AIBenchmarkInferenceComponent(Base): + """ + AIBenchmarkInferenceComponent + An inference component to benchmark. + + Attributes + ---------------------- + identifier: The name or Amazon Resource Name (ARN) of the inference component. + """ + + identifier: str + + +class AIBenchmarkEndpoint(Base): + """ + AIBenchmarkEndpoint + The SageMaker endpoint configuration for benchmarking. + + Attributes + ---------------------- + identifier: The name or Amazon Resource Name (ARN) of the SageMaker endpoint to benchmark. + target_container_hostname: The hostname of the specific container to target within a multi-container endpoint. + inference_components: The list of inference components to benchmark on the endpoint. + """ + + identifier: str + target_container_hostname: Optional[str] = Unassigned() + inference_components: Optional[List[AIBenchmarkInferenceComponent]] = Unassigned() + + +class AIBenchmarkJobSummary(Base): + """ + AIBenchmarkJobSummary + Summary information about an AI benchmark job. + + Attributes + ---------------------- + ai_benchmark_job_name: The name of the benchmark job. + ai_benchmark_job_arn: The Amazon Resource Name (ARN) of the benchmark job. + ai_benchmark_job_status: The status of the benchmark job. + creation_time: A timestamp that indicates when the benchmark job was created. + end_time: A timestamp that indicates when the benchmark job completed. + ai_workload_config_name: The name of the AI workload configuration used by the benchmark job. + """ + + ai_benchmark_job_name: str + ai_benchmark_job_arn: str + ai_benchmark_job_status: str + creation_time: datetime.datetime + end_time: Optional[datetime.datetime] = Unassigned() + ai_workload_config_name: Optional[str] = Unassigned() + + +class VpcConfig(Base): + """ + VpcConfig + Specifies an Amazon Virtual Private Cloud (VPC) that your SageMaker jobs, hosted models, and compute resources have access to. You can control access to and from your resources by configuring a VPC. For more information, see Give SageMaker Access to Resources in your Amazon VPC. + + Attributes + ---------------------- + security_group_ids: The VPC security group IDs, in the form sg-xxxxxxxx. Specify the security groups for the VPC that is specified in the Subnets field. + subnets: The ID of the subnets in the VPC to which you want to connect your training job or model. For information about the availability of specific instance types, see Supported Instance Types and Availability Zones. + """ + + security_group_ids: List[str] + subnets: List[str] + + +class AIBenchmarkNetworkConfig(Base): + """ + AIBenchmarkNetworkConfig + The network configuration for an AI benchmark job. + + Attributes + ---------------------- + vpc_config: The VPC configuration, including security group IDs and subnet IDs. + """ + + vpc_config: Optional[VpcConfig] = Unassigned() + + +class AIBenchmarkOutputConfig(Base): + """ + AIBenchmarkOutputConfig + The output configuration for an AI benchmark job. + + Attributes + ---------------------- + s3_output_location: The Amazon S3 URI where benchmark results are stored. + """ + + s3_output_location: str + + +class AICloudWatchLogs(Base): + """ + AICloudWatchLogs + CloudWatch log information for an AI benchmark or recommendation job. + + Attributes + ---------------------- + log_group_arn: The Amazon Resource Name (ARN) of the CloudWatch log group. + log_stream_name: The name of the CloudWatch log stream. + """ + + log_group_arn: Optional[str] = Unassigned() + log_stream_name: Optional[str] = Unassigned() + + +class AIBenchmarkOutputResult(Base): + """ + AIBenchmarkOutputResult + The output result of an AI benchmark job, including the Amazon S3 location and CloudWatch log information. + + Attributes + ---------------------- + s3_output_location: The Amazon S3 URI where benchmark results are stored. + cloud_watch_logs: The CloudWatch log information for the benchmark job. + """ + + s3_output_location: str + cloud_watch_logs: Optional[List[AICloudWatchLogs]] = Unassigned() + + +class AIBenchmarkTarget(Base): + """ + AIBenchmarkTarget + The target for an AI benchmark job. This is a union type — specify one of the members. + + Attributes + ---------------------- + endpoint: The SageMaker endpoint to benchmark. + """ + + endpoint: Optional[AIBenchmarkEndpoint] = Unassigned() + + +class AICapacityReservationConfig(Base): + """ + AICapacityReservationConfig + The capacity reservation configuration for an AI recommendation job. + + Attributes + ---------------------- + capacity_reservation_preference: The capacity reservation preference. The only valid value is capacity-reservations-only. + ml_reservation_arns: The list of ML reservation ARNs to use. + """ + + capacity_reservation_preference: Optional[str] = Unassigned() + ml_reservation_arns: Optional[List[str]] = Unassigned() + + +class AIWorkloadS3DataSource(Base): + """ + AIWorkloadS3DataSource + The Amazon S3 data source for an AI workload. + + Attributes + ---------------------- + s3_uri: The Amazon S3 URI of the data. + """ + + s3_uri: str + + +class AIWorkloadDataSource(Base): + """ + AIWorkloadDataSource + The data source for an AI workload input data channel. + + Attributes + ---------------------- + s3_data_source: The Amazon S3 data source configuration. + """ + + s3_data_source: Optional[AIWorkloadS3DataSource] = Unassigned() + + +class AIWorkloadInputDataConfig(Base): + """ + AIWorkloadInputDataConfig + A channel of input data for an AI workload configuration. Each channel has a name and a data source. + + Attributes + ---------------------- + channel_name: The logical name for the data channel. + data_source: The data source for this channel. + """ + + channel_name: str + data_source: AIWorkloadDataSource + + +class AIDatasetConfig(Base): + """ + AIDatasetConfig + The dataset configuration for an AI workload. This is a union type — specify one of the members. + + Attributes + ---------------------- + input_data_config: An array of input data channel configurations for the workload. + """ + + input_data_config: Optional[List[AIWorkloadInputDataConfig]] = Unassigned() + + +class AIModelSourceS3(Base): + """ + AIModelSourceS3 + The Amazon S3 model source configuration. + + Attributes + ---------------------- + s3_uri: The Amazon S3 URI of the model artifacts. + """ + + s3_uri: Optional[str] = Unassigned() + + +class AIModelSource(Base): + """ + AIModelSource + The source of the model for an AI recommendation job. This is a union type. + + Attributes + ---------------------- + s3: The Amazon S3 location of the model artifacts. + """ + + s3: Optional[AIModelSourceS3] = Unassigned() + + +class AIRecommendationOptimizationDetail(Base): + """ + AIRecommendationOptimizationDetail + Details about an optimization technique applied in a recommendation. + + Attributes + ---------------------- + optimization_type: The type of optimization. Valid values are SpeculativeDecoding and KernelTuning. + optimization_config: A map of configuration parameters for the optimization technique. + """ + + optimization_type: str + optimization_config: Optional[Dict[str, str]] = Unassigned() + + +class AIRecommendationInstanceDetail(Base): + """ + AIRecommendationInstanceDetail + Instance details for a recommendation. + + Attributes + ---------------------- + instance_type: The recommended instance type. + instance_count: The recommended number of instances. + copy_count_per_instance: The number of model copies per instance. + """ + + instance_type: Optional[str] = Unassigned() + instance_count: Optional[int] = Unassigned() + copy_count_per_instance: Optional[int] = Unassigned() + + +class AIRecommendationModelDetails(Base): + """ + AIRecommendationModelDetails + Details about the model package in a recommendation. + + Attributes + ---------------------- + model_package_arn: The Amazon Resource Name (ARN) of the model package. + inference_specification_name: The name of the inference specification within the model package. + instance_details: The instance details for this recommendation, including instance type, count, and model copies per instance. + """ + + model_package_arn: Optional[str] = Unassigned() + inference_specification_name: Optional[str] = Unassigned() + instance_details: Optional[List[AIRecommendationInstanceDetail]] = Unassigned() + + +class AIRecommendationDeploymentS3Channel(Base): + """ + AIRecommendationDeploymentS3Channel + An Amazon S3 data channel for a recommended deployment configuration, containing model artifacts or optimized model outputs. + + Attributes + ---------------------- + channel_name: A custom name for this Amazon S3 data channel. + uri: The Amazon S3 URI of the data for this channel. + """ + + channel_name: Optional[str] = Unassigned() + uri: Optional[str] = Unassigned() + + +class AIRecommendationDeploymentConfiguration(Base): + """ + AIRecommendationDeploymentConfiguration + The deployment configuration for a recommendation. + + Attributes + ---------------------- + s3: The Amazon S3 data channels for the deployment. + image_uri: The URI of the container image for the deployment. + instance_type: The recommended instance type for the deployment. + instance_count: The recommended number of instances for the deployment. + copy_count_per_instance: The number of model copies per instance. + environment_variables: The environment variables for the deployment. + """ + + s3: Optional[List[AIRecommendationDeploymentS3Channel]] = Unassigned() + image_uri: Optional[str] = Unassigned() + instance_type: Optional[str] = Unassigned() + instance_count: Optional[int] = Unassigned() + copy_count_per_instance: Optional[int] = Unassigned() + environment_variables: Optional[Dict[str, str]] = Unassigned() + + +class AIRecommendationPerformanceMetric(Base): + """ + AIRecommendationPerformanceMetric + An expected performance metric for a recommendation. + + Attributes + ---------------------- + metric: The name of the performance metric. + stat: The statistical measure for the metric. + value: The value of the metric. + unit: The unit of the metric value. + """ + + metric: str + value: str + stat: Optional[str] = Unassigned() + unit: Optional[str] = Unassigned() + + +class AIRecommendation(Base): + """ + AIRecommendation + An optimization recommendation generated by an AI recommendation job. + + Attributes + ---------------------- + recommendation_description: A description of the recommendation. + optimization_details: The optimization techniques applied in this recommendation. + model_details: Details about the model package associated with this recommendation. + deployment_configuration: The deployment configuration for this recommendation, including the container image, instance type, instance count, and environment variables. + ai_benchmark_job_arn: The Amazon Resource Name (ARN) of the benchmark job associated with this recommendation. + expected_performance: The expected performance metrics for this recommendation. + """ + + recommendation_description: Optional[str] = Unassigned() + optimization_details: Optional[List[AIRecommendationOptimizationDetail]] = Unassigned() + model_details: Optional[AIRecommendationModelDetails] = Unassigned() + deployment_configuration: Optional[AIRecommendationDeploymentConfiguration] = Unassigned() + ai_benchmark_job_arn: Optional[str] = Unassigned() + expected_performance: Optional[List[AIRecommendationPerformanceMetric]] = Unassigned() + + +class AIRecommendationComputeSpec(Base): + """ + AIRecommendationComputeSpec + The compute resource specification for an AI recommendation job. + + Attributes + ---------------------- + instance_types: The list of instance types to consider for recommendations. You can specify up to 3 instance types. + capacity_reservation_config: The capacity reservation configuration. + """ + + instance_types: Optional[List[str]] = Unassigned() + capacity_reservation_config: Optional[AICapacityReservationConfig] = Unassigned() + + +class AIRecommendationConstraint(Base): + """ + AIRecommendationConstraint + A performance constraint for an AI recommendation job. + + Attributes + ---------------------- + metric: The performance metric. Valid values are ttft-ms (time to first token in milliseconds), throughput, and cost. + """ + + metric: str + + +class AIRecommendationInferenceSpecification(Base): + """ + AIRecommendationInferenceSpecification + The inference framework for an AI recommendation job. + + Attributes + ---------------------- + framework: The inference framework. Valid values are LMI and VLLM. + """ + + framework: Optional[str] = Unassigned() + + +class AIRecommendationJobSummary(Base): + """ + AIRecommendationJobSummary + Summary information about an AI recommendation job. + + Attributes + ---------------------- + ai_recommendation_job_name: The name of the recommendation job. + ai_recommendation_job_arn: The Amazon Resource Name (ARN) of the recommendation job. + ai_recommendation_job_status: The status of the recommendation job. + creation_time: A timestamp that indicates when the recommendation job was created. + end_time: A timestamp that indicates when the recommendation job completed. + """ + + ai_recommendation_job_name: str + ai_recommendation_job_arn: str + ai_recommendation_job_status: str + creation_time: datetime.datetime + end_time: Optional[datetime.datetime] = Unassigned() + + +class AIRecommendationOutputConfig(Base): + """ + AIRecommendationOutputConfig + The output configuration for an AI recommendation job. + + Attributes + ---------------------- + s3_output_location: The Amazon S3 URI where recommendation results are stored. + model_package_group_identifier: The name or Amazon Resource Name (ARN) of the model package group where the optimized model is registered as a new model package version. + """ + + s3_output_location: Optional[str] = Unassigned() + model_package_group_identifier: Optional[str] = Unassigned() + + +class AIRecommendationOutputResult(Base): + """ + AIRecommendationOutputResult + The output configuration for an AI recommendation job, including the S3 location for results and the model package group for deployment. + + Attributes + ---------------------- + s3_output_location: The Amazon S3 URI where the recommendation job writes its output results. + model_package_group_identifier: The name or Amazon Resource Name (ARN) of the model package group where deployment-ready model packages are registered. + """ + + s3_output_location: str + model_package_group_identifier: Optional[str] = Unassigned() + + +class AIRecommendationPerformanceTarget(Base): + """ + AIRecommendationPerformanceTarget + The performance targets for an AI recommendation job. + + Attributes + ---------------------- + constraints: An array of performance constraints that define the optimization objectives. + """ + + constraints: List[AIRecommendationConstraint] + + +class AIWorkloadConfigSummary(Base): + """ + AIWorkloadConfigSummary + Summary information about an AI workload configuration. + + Attributes + ---------------------- + ai_workload_config_name: The name of the AI workload configuration. + ai_workload_config_arn: The Amazon Resource Name (ARN) of the AI workload configuration. + creation_time: A timestamp that indicates when the configuration was created. + """ + + ai_workload_config_name: str + ai_workload_config_arn: str + creation_time: datetime.datetime + + +class WorkloadSpec(Base): + """ + WorkloadSpec + The workload specification for benchmark tool configuration. Provide an inline YAML or JSON string. + + Attributes + ---------------------- + inline: An inline YAML or JSON string that defines benchmark parameters. + """ + + inline: Optional[str] = Unassigned() + + +class AIWorkloadConfigs(Base): + """ + AIWorkloadConfigs + The benchmark tool configuration for an AI workload. + + Attributes + ---------------------- + workload_spec: The workload specification that defines benchmark parameters. + """ + + workload_spec: WorkloadSpec + + class AcceleratorPartitionConfig(Base): """ AcceleratorPartitionConfig @@ -635,6 +1144,21 @@ class ModelInput(Base): data_input_config: str +class AdditionalModelDataSource(Base): + """ + AdditionalModelDataSource + Data sources that are available to your model in addition to the one that you specify for ModelDataSource when you use the CreateModel action. + + Attributes + ---------------------- + channel_name: A custom name for this AdditionalModelDataSource object. + s3_data_source + """ + + channel_name: str + s3_data_source: S3ModelDataSource + + class AdditionalS3DataSource(Base): """ AdditionalS3DataSource @@ -689,6 +1213,7 @@ class ModelPackageContainerDefinition(Base): framework: The machine learning framework of the model package container image. framework_version: The framework version of the Model Package Container Image. nearest_model_name: The name of a pre-trained machine learning benchmarked by Amazon SageMaker Inference Recommender model that matches your model. You can find a list of benchmarked models by calling ListModelMetadata. + additional_model_data_sources: Data sources that are available to your model in addition to the one that you specify for ModelDataSource when you use the CreateModelPackage action. additional_s3_data_source: The additional data source that is used during inference in the Docker container for your model package. model_data_e_tag: The ETag associated with Model Data URL. is_checkpoint: Specifies whether the model data is a training checkpoint. @@ -706,6 +1231,7 @@ class ModelPackageContainerDefinition(Base): framework: Optional[str] = Unassigned() framework_version: Optional[str] = Unassigned() nearest_model_name: Optional[str] = Unassigned() + additional_model_data_sources: Optional[List[AdditionalModelDataSource]] = Unassigned() additional_s3_data_source: Optional[AdditionalS3DataSource] = Unassigned() model_data_e_tag: Optional[str] = Unassigned() is_checkpoint: Optional[bool] = Unassigned() @@ -737,21 +1263,6 @@ class AdditionalInferenceSpecificationDefinition(Base): supported_response_mime_types: Optional[List[str]] = Unassigned() -class AdditionalModelDataSource(Base): - """ - AdditionalModelDataSource - Data sources that are available to your model in addition to the one that you specify for ModelDataSource when you use the CreateModel action. - - Attributes - ---------------------- - channel_name: A custom name for this AdditionalModelDataSource object. - s3_data_source - """ - - channel_name: str - s3_data_source: S3ModelDataSource - - class AgentVersion(Base): """ AgentVersion @@ -1354,6 +1865,7 @@ class ResourceSpec(Base): sage_maker_image_version_alias: The SageMakerImageVersionAlias of the image to launch with. This value is in SemVer 2.0.0 versioning format. instance_type: The instance type that the image version runs on. JupyterServer apps only support the system value. For KernelGateway apps, the system value is translated to ml.t3.medium. KernelGateway apps also support all other values for available instance types. lifecycle_config_arn: The Amazon Resource Name (ARN) of the Lifecycle Configuration attached to the Resource. + training_plan_arn: The ARN of the SageMaker AI Training Plan to use for this app. When you specify a training plan, the app launches on reserved GPU capacity. This field is supported for JupyterLab and CodeEditor app types. For more information about how to reserve GPU capacity with SageMaker AI Training Plans, see Using training plans in Studio applications. """ sage_maker_image_arn: Optional[str] = Unassigned() @@ -1361,6 +1873,7 @@ class ResourceSpec(Base): sage_maker_image_version_alias: Optional[str] = Unassigned() instance_type: Optional[str] = Unassigned() lifecycle_config_arn: Optional[str] = Unassigned() + training_plan_arn: Optional[str] = Unassigned() class AppDetails(Base): @@ -2107,21 +2620,6 @@ class AutoMLJobCompletionCriteria(Base): max_auto_ml_job_runtime_in_seconds: Optional[int] = Unassigned() -class VpcConfig(Base): - """ - VpcConfig - Specifies an Amazon Virtual Private Cloud (VPC) that your SageMaker jobs, hosted models, and compute resources have access to. You can control access to and from your resources by configuring a VPC. For more information, see Give SageMaker Access to Resources in your Amazon VPC. - - Attributes - ---------------------- - security_group_ids: The VPC security group IDs, in the form sg-xxxxxxxx. Specify the security groups for the VPC that is specified in the Subnets field. - subnets: The ID of the subnets in the VPC to which you want to connect your training job or model. For information about the availability of specific instance types, see Supported Instance Types and Availability Zones. - """ - - security_group_ids: List[str] - subnets: List[str] - - class AutoMLSecurityConfig(Base): """ AutoMLSecurityConfig @@ -3766,6 +4264,21 @@ class InstanceGroupScalingMetadata(Base): failure_message: Optional[str] = Unassigned() +class InstanceRequirementsEniConfiguration(Base): + """ + InstanceRequirementsEniConfiguration + The customer ENI and additional ENIs associated with a network interface category. + + Attributes + ---------------------- + customer_eni: The ID of the customer-managed Elastic Network Interface (ENI) associated with the instance type category. + additional_enis: Information about additional Elastic Network Interfaces (ENIs) associated with the instance type category. + """ + + customer_eni: Optional[str] = Unassigned() + additional_enis: Optional[AdditionalEnis] = Unassigned() + + class InstanceMetadata(Base): """ InstanceMetadata @@ -3775,6 +4288,7 @@ class InstanceMetadata(Base): ---------------------- customer_eni: The ID of the customer-managed Elastic Network Interface (ENI) associated with the instance. additional_enis: Information about additional Elastic Network Interfaces (ENIs) associated with the instance. + instance_requirements_eni_configurations: The ENI configurations for the instance types in the instance requirements, grouped by network interface category (for example, ENI-only or EFA with ENIs). At most one configuration per category. capacity_reservation: Information about the Capacity Reservation used by the instance. failure_message: An error message describing why the instance creation or update failed, if applicable. lcs_execution_state: The execution state of the Lifecycle Script (LCS) for the instance. @@ -3783,6 +4297,9 @@ class InstanceMetadata(Base): customer_eni: Optional[str] = Unassigned() additional_enis: Optional[AdditionalEnis] = Unassigned() + instance_requirements_eni_configurations: Optional[ + List[InstanceRequirementsEniConfiguration] + ] = Unassigned() capacity_reservation: Optional[CapacityReservation] = Unassigned() failure_message: Optional[str] = Unassigned() lcs_execution_state: Optional[str] = Unassigned() @@ -3837,6 +4354,7 @@ class ClusterEventDetail(Base): event_time: The timestamp when the event occurred. event_details: Additional details about the event, including event-specific metadata. description: A human-readable description of the event. + event_level: The severity level of the event. Valid values are Info, Warn, and Error. """ event_id: str @@ -3848,6 +4366,7 @@ class ClusterEventDetail(Base): instance_id: Optional[str] = Unassigned() event_details: Optional[EventDetails] = Unassigned() description: Optional[str] = Unassigned() + event_level: Optional[str] = Unassigned() class ClusterEventSummary(Base): @@ -3865,6 +4384,7 @@ class ClusterEventSummary(Base): resource_type: The type of resource associated with the event. Valid values are Cluster, InstanceGroup, or Instance. event_time: The timestamp when the event occurred. description: A brief, human-readable description of the event. + event_level: The severity level of the event. Valid values are Info, Warn, and Error. """ event_id: str @@ -3875,6 +4395,7 @@ class ClusterEventSummary(Base): instance_group_name: Optional[str] = Unassigned() instance_id: Optional[str] = Unassigned() description: Optional[str] = Unassigned() + event_level: Optional[str] = Unassigned() class ClusterFsxLustreConfig(Base): @@ -4112,6 +4633,7 @@ class ClusterInstanceGroupDetails(Base): scheduled_update_config: The configuration object of the schedule that SageMaker follows when updating the AMI. current_image_id: The ID of the Amazon Machine Image (AMI) currently in use by the instance group. desired_image_id: The ID of the Amazon Machine Image (AMI) desired for the instance group. + image_version_status: The status of the image version for the instance group. Indicates whether the instance group is running the latest image version or if an update is available. active_operations: A map indicating active operations currently in progress for the instance group of a SageMaker HyperPod cluster. When there is a scaling operation in progress, this map contains a key Scaling with value 1. kubernetes_config: The Kubernetes configuration for the instance group that contains labels and taints to be applied for the nodes in this instance group. capacity_requirements: The instance capacity requirements for the instance group. @@ -4141,6 +4663,7 @@ class ClusterInstanceGroupDetails(Base): scheduled_update_config: Optional[ScheduledUpdateConfig] = Unassigned() current_image_id: Optional[str] = Unassigned() desired_image_id: Optional[str] = Unassigned() + image_version_status: Optional[str] = Unassigned() active_operations: Optional[Dict[str, int]] = Unassigned() kubernetes_config: Optional[ClusterKubernetesConfigDetails] = Unassigned() capacity_requirements: Optional[ClusterCapacityRequirements] = Unassigned() @@ -4342,6 +4865,7 @@ class ClusterNodeDetails(Base): placement: The placement details of the SageMaker HyperPod cluster node. current_image_id: The ID of the Amazon Machine Image (AMI) currently in use by the node. desired_image_id: The ID of the Amazon Machine Image (AMI) desired for the node. + image_version_status: The status of the image version for the cluster node. ultra_server_info: Contains information about the UltraServer. kubernetes_config: The Kubernetes configuration applied to this node, showing both the current and desired state of labels and taints. The cluster works to reconcile the actual state with the declared state. capacity_type: The capacity type of the node. Valid values are OnDemand and Spot. When set to OnDemand, the node is launched as an On-Demand instance. When set to Spot, the node is launched as a Spot instance. @@ -4365,6 +4889,7 @@ class ClusterNodeDetails(Base): placement: Optional[ClusterInstancePlacement] = Unassigned() current_image_id: Optional[str] = Unassigned() desired_image_id: Optional[str] = Unassigned() + image_version_status: Optional[str] = Unassigned() ultra_server_info: Optional[UltraServerInfo] = Unassigned() kubernetes_config: Optional[ClusterKubernetesConfigNodeDetails] = Unassigned() capacity_type: Optional[str] = Unassigned() @@ -4387,6 +4912,7 @@ class ClusterNodeSummary(Base): instance_status: The status of the instance. ultra_server_info: Contains information about the UltraServer. private_dns_hostname: The private DNS hostname of the SageMaker HyperPod cluster node. + image_version_status: The status of the image version for the cluster node. """ instance_group_name: str @@ -4398,6 +4924,7 @@ class ClusterNodeSummary(Base): last_software_update_time: Optional[datetime.datetime] = Unassigned() ultra_server_info: Optional[UltraServerInfo] = Unassigned() private_dns_hostname: Optional[str] = Unassigned() + image_version_status: Optional[str] = Unassigned() class ClusterOrchestratorEksConfig(Base): @@ -4547,13 +5074,13 @@ class ClusterRestrictedInstanceGroupSpecification(Base): instance_group_name: str instance_type: str execution_role: str - environment_config: EnvironmentConfig threads_per_core: Optional[int] = Unassigned() instance_storage_configs: Optional[List[ClusterInstanceStorageConfig]] = Unassigned() on_start_deep_health_checks: Optional[List[str]] = Unassigned() training_plan_arn: Optional[str] = Unassigned() override_vpc_config: Optional[VpcConfig] = Unassigned() scheduled_update_config: Optional[ScheduledUpdateConfig] = Unassigned() + environment_config: Optional[EnvironmentConfig] = Unassigned() class ClusterSchedulerConfigSummary(Base): @@ -5858,12 +6385,14 @@ class StudioWebPortalSettings(Base): hidden_app_types: The Applications supported in Studio that are hidden from the Studio left navigation pane. hidden_instance_types: The instance types you are hiding from the Studio user interface. hidden_sage_maker_image_version_aliases: The version aliases you are hiding from the Studio user interface. + execution_role_session_name_mode: The execution role session name mode. If this value is set to USER_IDENTITY, the session name of the execution role corresponds to the user's identity. For IAM domains, the session name is the IAM session name used to generate the presigned URL. For IAM Identity Center domains, the session name is the username of the associated IAM Identity Center user. If this value is set to STATIC or is not set, the session name defaults to SageMaker. """ hidden_ml_tools: Optional[List[str]] = Unassigned() hidden_app_types: Optional[List[str]] = Unassigned() hidden_instance_types: Optional[List[str]] = Unassigned() hidden_sage_maker_image_version_aliases: Optional[List[HiddenSageMakerImage]] = Unassigned() + execution_role_session_name_mode: Optional[str] = Unassigned() class UserSettings(Base): @@ -6109,6 +6638,23 @@ class DeploymentStage(Base): deployment_config: Optional[EdgeDeploymentConfig] = Unassigned() +class InstancePool(Base): + """ + InstancePool + Specifies an instance type and its priority for a heterogeneous endpoint. Use instance pools to configure a production variant with multiple instance types, enabling the endpoint to provision instances across different types based on priority. + + Attributes + ---------------------- + instance_type: The ML compute instance type for the instance pool. + model_name_override: The name of a SageMaker model to use for this instance pool instead of the model specified for the production variant. Use this to deploy a different model optimized for the instance type in this pool. + priority: The priority for the instance pool. SageMaker attempts to provision instances in order of priority, starting with the lowest value. If instances for a higher-priority pool are unavailable, SageMaker attempts to provision from the next pool. Valid values: 1 to 5, where 1 is the highest priority. + """ + + instance_type: str + priority: int + model_name_override: Optional[str] = Unassigned() + + class ProductionVariantCoreDumpConfig(Base): """ ProductionVariantCoreDumpConfig @@ -6216,6 +6762,8 @@ class ProductionVariant(Base): model_name: The name of the model that you want to host. This is the name that you specified when creating the model. initial_instance_count: Number of instances to launch initially. instance_type: The ML compute instance type. + instance_pools: A list of instance pools for the production variant. Each instance pool specifies an instance type and its priority for provisioning. Use instance pools to configure heterogeneous endpoints that deploy models across multiple instance types. + variant_instance_provision_timeout_in_seconds: The timeout value, in seconds, for provisioning instances for the production variant. When SageMaker encounters an insufficient capacity error while provisioning instances, it retries with the next instance pool (if configured) or waits until the timeout expires. This timeout applies only to capacity provisioning and does not include the time for model download or container startup. Valid values: 300 to 3600. initial_variant_weight: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. The traffic to a production variant is determined by the ratio of the VariantWeight to the sum of all VariantWeight values across all ProductionVariants. If unspecified, it defaults to 1.0. accelerator_type: This parameter is no longer supported. Elastic Inference (EI) is no longer available. This parameter was used to specify the size of the EI instance to use for the production variant. core_dump_config: Specifies configuration for a core dump from the model container when the process crashes. @@ -6234,6 +6782,8 @@ class ProductionVariant(Base): model_name: Optional[Union[str, object]] = Unassigned() initial_instance_count: Optional[int] = Unassigned() instance_type: Optional[str] = Unassigned() + instance_pools: Optional[List[InstancePool]] = Unassigned() + variant_instance_provision_timeout_in_seconds: Optional[int] = Unassigned() initial_variant_weight: Optional[float] = Unassigned() accelerator_type: Optional[str] = Unassigned() core_dump_config: Optional[ProductionVariantCoreDumpConfig] = Unassigned() @@ -6291,8 +6841,8 @@ class MetricsConfig(Base): Attributes ---------------------- - enable_enhanced_metrics: Specifies whether to enable enhanced metrics for the endpoint. Enhanced metrics provide utilization data at instance and container granularity. Container granularity is supported for Inference Components. The default is False. - metric_publish_frequency_in_seconds: The frequency, in seconds, at which utilization metrics are published to Amazon CloudWatch. The default is 60 seconds. + enable_enhanced_metrics: Specifies whether to enable enhanced metrics for the endpoint. Enhanced metrics provide utilization and invocation data at instance and container granularity. Container granularity is supported for Inference Components. The default is False. + metric_publish_frequency_in_seconds: The interval, in seconds, at which metrics are published to Amazon CloudWatch. Defaults to 60. Valid values: 10, 30, 60, 120, 180, 240, 300. When EnableEnhancedMetrics is set to False, this interval applies to utilization metrics only; invocation metrics continue to be published at the default 60-second interval. When EnableEnhancedMetrics is set to True, this interval applies to both utilization and invocation metrics. """ enable_enhanced_metrics: Optional[bool] = Unassigned() @@ -6428,8 +6978,8 @@ class OfflineStoreConfig(Base): Attributes ---------------------- s3_storage_config: The Amazon Simple Storage (Amazon S3) location of OfflineStore. - disable_glue_table_creation: Set to True to disable the automatic creation of an Amazon Web Services Glue table when configuring an OfflineStore. If set to False, Feature Store will name the OfflineStore Glue table following Athena's naming recommendations. The default value is False. - data_catalog_config: The meta data of the Glue table that is autogenerated when an OfflineStore is created. + disable_glue_table_creation: Set to True to disable the automatic creation of an Amazon Web Services Glue table when configuring an OfflineStore. If set to True and DataCatalogConfig is provided, Feature Store associates the provided catalog configuration with the feature group without creating a table. In this case, you are responsible for creating and managing the Glue table. If set to True without DataCatalogConfig, no Glue table is created or associated with the feature group. The Iceberg table format is only supported when this is set to False. If set to False and DataCatalogConfig is provided, Feature Store creates the table using the specified names. If set to False without DataCatalogConfig, Feature Store auto-generates the table name following Athena's naming recommendations. This applies to both Glue and Apache Iceberg table formats. The default value is False. + data_catalog_config: The meta data of the Glue table for the OfflineStore. If not provided, Feature Store auto-generates the table name, database, and catalog when the OfflineStore is created. You can optionally provide this configuration to specify custom values. This applies to both Glue and Apache Iceberg table formats. table_format: Format for the offline store table. Supported formats are Glue (Default) and Apache Iceberg. """ @@ -6987,6 +7537,7 @@ class InferenceComponentSpecification(Base): Attributes ---------------------- + instance_type: The ML compute instance type for the inference component specification. Specifies which instance type this specification applies to. Required when using the Specifications parameter with multiple entries. model_name: The name of an existing SageMaker AI model object in your account that you want to deploy with the inference component. container: Defines a container that provides the runtime environment for a model that you deploy with an inference component. startup_parameters: Settings that take effect while the model container starts up. @@ -6996,6 +7547,7 @@ class InferenceComponentSpecification(Base): scheduling_config: The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed. """ + instance_type: Optional[str] = Unassigned() model_name: Optional[Union[str, object]] = Unassigned() container: Optional[InferenceComponentContainerSpecification] = Unassigned() startup_parameters: Optional[InferenceComponentStartupParameters] = Unassigned() @@ -7723,6 +8275,19 @@ class InferenceExecutionConfig(Base): mode: str +class ManagedConfiguration(Base): + """ + ManagedConfiguration + The managed configuration of a model package group. + + Attributes + ---------------------- + managed_storage_type: The storage type of the model package. + """ + + managed_storage_type: Optional[str] = Unassigned() + + class ModelPackageValidationProfile(Base): """ ModelPackageValidationProfile @@ -9620,6 +10185,21 @@ class EdgePresetDeploymentOutput(Base): status_message: Optional[str] = Unassigned() +class InstancePoolSummary(Base): + """ + InstancePoolSummary + A summary of an instance pool for a production variant, including the instance type and the current number of instances. + + Attributes + ---------------------- + instance_type: The ML compute instance type for the instance pool. + current_instance_count: The current number of instances of this type in the instance pool. + """ + + instance_type: str + current_instance_count: int + + class ProductionVariantStatus(Base): """ ProductionVariantStatus @@ -9692,6 +10272,7 @@ class ProductionVariantSummary(Base): desired_weight: The requested weight, as specified in the UpdateEndpointWeightsAndCapacities request. current_instance_count: The number of instances associated with the variant. desired_instance_count: The number of instances requested in the UpdateEndpointWeightsAndCapacities request. + instance_pools: A list of instance pools for the production variant. Each pool indicates the instance type and the current number of instances of that type. variant_status: The endpoint variant status which describes the current deployment stage status or operational status. current_serverless_config: The serverless configuration for the endpoint. desired_serverless_config: The serverless configuration requested for the endpoint update. @@ -9706,6 +10287,7 @@ class ProductionVariantSummary(Base): desired_weight: Optional[float] = Unassigned() current_instance_count: Optional[int] = Unassigned() desired_instance_count: Optional[int] = Unassigned() + instance_pools: Optional[List[InstancePoolSummary]] = Unassigned() variant_status: Optional[List[ProductionVariantStatus]] = Unassigned() current_serverless_config: Optional[ProductionVariantServerlessConfig] = Unassigned() desired_serverless_config: Optional[ProductionVariantServerlessConfig] = Unassigned() @@ -9730,6 +10312,7 @@ class PendingProductionVariantSummary(Base): current_instance_count: The number of instances associated with the variant. desired_instance_count: The number of instances requested in this deployment, as specified in the endpoint configuration for the endpoint. The value is taken from the request to the CreateEndpointConfig operation. instance_type: The type of instances associated with the variant. + instance_pools: A list of instance pools for the production variant. Each pool indicates the instance type and the current number of instances of that type. accelerator_type: This parameter is no longer supported. Elastic Inference (EI) is no longer available. This parameter was used to specify the size of the EI instance to use for the production variant. variant_status: The endpoint variant status which describes the current deployment stage status or operational status. current_serverless_config: The serverless configuration for the endpoint. @@ -9745,6 +10328,7 @@ class PendingProductionVariantSummary(Base): current_instance_count: Optional[int] = Unassigned() desired_instance_count: Optional[int] = Unassigned() instance_type: Optional[str] = Unassigned() + instance_pools: Optional[List[InstancePoolSummary]] = Unassigned() accelerator_type: Optional[str] = Unassigned() variant_status: Optional[List[ProductionVariantStatus]] = Unassigned() current_serverless_config: Optional[ProductionVariantServerlessConfig] = Unassigned() @@ -10036,6 +10620,7 @@ class InferenceComponentSpecificationSummary(Base): Attributes ---------------------- + instance_type: The ML compute instance type associated with this inference component specification. model_name: The name of the SageMaker AI model object that is deployed with the inference component. container: Details about the container that provides the runtime environment for the model that is deployed with the inference component. startup_parameters: Settings that take effect while the model container starts up. @@ -10045,6 +10630,7 @@ class InferenceComponentSpecificationSummary(Base): scheduling_config: The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed. """ + instance_type: Optional[str] = Unassigned() model_name: Optional[Union[str, object]] = Unassigned() container: Optional[InferenceComponentContainerSpecificationSummary] = Unassigned() startup_parameters: Optional[InferenceComponentStartupParameters] = Unassigned() @@ -10056,6 +10642,21 @@ class InferenceComponentSpecificationSummary(Base): scheduling_config: Optional[InferenceComponentSchedulingConfig] = Unassigned() +class InferenceComponentPlacementStatus(Base): + """ + InferenceComponentPlacementStatus + The placement status of an inference component on a specific instance type. Shows the number of inference component copies currently placed on instances of a given type. + + Attributes + ---------------------- + instance_type: The ML compute instance type where the inference component copies are placed. + current_copy_count: The number of inference component copies currently placed on instances of this type. + """ + + instance_type: str + current_copy_count: int + + class InferenceComponentRuntimeConfigSummary(Base): """ InferenceComponentRuntimeConfigSummary @@ -10065,10 +10666,12 @@ class InferenceComponentRuntimeConfigSummary(Base): ---------------------- desired_copy_count: The number of runtime copies of the model container that you requested to deploy with the inference component. current_copy_count: The number of runtime copies of the model container that are currently deployed. + placement_status: The placement status of the inference component across instance types. Shows how the inference component copies are distributed across instance types. """ desired_copy_count: Optional[int] = Unassigned() current_copy_count: Optional[int] = Unassigned() + placement_status: Optional[List[InferenceComponentPlacementStatus]] = Unassigned() class InferenceComponentCapacitySize(Base): @@ -12508,6 +13111,7 @@ class ModelPackageGroupSummary(Base): model_package_group_description: A description of the model group. creation_time: The time that the model group was created. model_package_group_status: The status of the model group. + managed_configuration: The managed configuration of the model package group. """ model_package_group_name: Union[str, object] @@ -12515,6 +13119,7 @@ class ModelPackageGroupSummary(Base): creation_time: datetime.datetime model_package_group_status: str model_package_group_description: Optional[str] = Unassigned() + managed_configuration: Optional[ManagedConfiguration] = Unassigned() class ModelPackageSummary(Base): @@ -13291,7 +13896,7 @@ class TrainingPlanSummary(Base): available_instance_count: The number of instances currently available for use in this training plan. in_use_instance_count: The number of instances currently in use from this training plan. total_ultra_server_count: The total number of UltraServers allocated to this training plan. - target_resources: The target resources (e.g., training jobs, HyperPod clusters, Endpoints) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group. A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment. + target_resources: The target resources (e.g., training jobs, HyperPod clusters, Endpoints, Studio apps) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group. A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment. A training plan for Studio apps can be used to launch JupyterLab and Code Editor apps on reserved training plan capacity. reserved_capacity_summaries: A list of reserved capacities associated with this training plan, including details such as instance types, counts, and availability zones. """ @@ -14256,6 +14861,7 @@ class TrainingJob(Base): input_data_config: An array of Channel objects that describes each data input channel. Your input must be in the same Amazon Web Services region as your training job. output_data_config: The S3 path where model artifacts that you configured when creating the job are stored. SageMaker creates subfolders for model artifacts. resource_config: Resources, including ML compute instances and ML storage volumes, that are configured for model training. + warm_pool_status: The status of the warm pool associated with the training job. vpc_config: A VpcConfig object that specifies the VPC that this training job has access to. For more information, see Protect Training Jobs by Using an Amazon Virtual Private Cloud. stopping_condition: Specifies a limit to how long a model training job can run. It also specifies how long a managed Spot training job has to complete. When the job reaches the time limit, SageMaker ends the training job. Use this API to cap model training costs. To stop a job, SageMaker sends the algorithm the SIGTERM signal, which delays job termination for 120 seconds. Algorithms can use this 120-second window to save the model artifacts, so the results of training are not lost. creation_time: A timestamp that indicates when the training job was created. @@ -14298,6 +14904,7 @@ class TrainingJob(Base): input_data_config: Optional[List[Channel]] = Unassigned() output_data_config: Optional[OutputDataConfig] = Unassigned() resource_config: Optional[ResourceConfig] = Unassigned() + warm_pool_status: Optional[WarmPoolStatus] = Unassigned() vpc_config: Optional[VpcConfig] = Unassigned() stopping_condition: Optional[StoppingCondition] = Unassigned() creation_time: Optional[datetime.datetime] = Unassigned() @@ -14550,7 +15157,7 @@ class TrainingPlanOffering(Base): Attributes ---------------------- training_plan_offering_id: The unique identifier for this training plan offering. - target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints) for this training plan offering. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group. A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment. + target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints, Studio apps) for this training plan offering. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group. A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment. A training plan for Studio apps can be used to launch JupyterLab and Code Editor apps on reserved training plan capacity. requested_start_time_after: The requested start time that the user specified when searching for the training plan offering. requested_end_time_before: The requested end time that the user specified when searching for the training plan offering. duration_hours: The number of whole hours in the total duration for this training plan offering. diff --git a/src/sagemaker_core/tools/api_coverage.json b/src/sagemaker_core/tools/api_coverage.json index 5a8993e..7155452 100644 --- a/src/sagemaker_core/tools/api_coverage.json +++ b/src/sagemaker_core/tools/api_coverage.json @@ -1 +1 @@ -{"SupportedAPIs": 374, "UnsupportedAPIs": 17} \ No newline at end of file +{"SupportedAPIs": 388, "UnsupportedAPIs": 17} \ No newline at end of file