From 1277943827ddf5a924c773fd256c5709403b374b Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Wed, 13 May 2026 11:44:32 -0700 Subject: [PATCH 1/8] fix: unskip evaluator integ test classes in sm-train --- sagemaker-train/tests/integ/train/test_benchmark_evaluator.py | 2 +- .../tests/integ/train/test_custom_scorer_evaluator.py | 2 +- .../tests/integ/train/test_llm_as_judge_evaluator.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index 454fdd67c7..cb1882ff33 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -72,7 +72,7 @@ } -@pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") +# @pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") class TestBenchmarkEvaluatorIntegration: """Integration tests for BenchmarkEvaluator with fine-tuned model package""" diff --git a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py index 0af4ca1838..4f6af39303 100644 --- a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py @@ -55,7 +55,7 @@ } -@pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") +# @pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") class TestCustomScorerEvaluatorIntegration: """Integration tests for CustomScorerEvaluator with custom evaluator""" diff --git a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py index 49a68c22d9..a467450d51 100644 --- a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py @@ -84,7 +84,7 @@ } -@pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") +# @pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") class TestLLMAsJudgeEvaluatorIntegration: """Integration tests for LLMAsJudgeEvaluator""" From 107e5fbe291f132990ebf8ad48745340eb7fb7cd Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Wed, 13 May 2026 14:38:10 -0700 Subject: [PATCH 2/8] debug: unskip all sm-train integ tests --- sagemaker-train/tests/integ/train/test_benchmark_evaluator.py | 4 ++-- .../tests/integ/train/test_custom_scorer_evaluator.py | 4 ++-- .../tests/integ/train/test_llm_as_judge_evaluator.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index cb1882ff33..619971001e 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -286,7 +286,7 @@ def test_benchmark_subtasks_validation(self): logger.info("Subtask validation tests passed") - @pytest.mark.skip(reason="Base model only evaluation - to be enabled when needed") + # @pytest.mark.skip(reason="Base model only evaluation - to be enabled when needed") def test_benchmark_evaluation_base_model_only(self): """ Test benchmark evaluation with base model only (no fine-tuned model). @@ -339,7 +339,7 @@ def test_benchmark_evaluation_base_model_only(self): assert execution.status.overall_status == "Succeeded" logger.info("Base model only evaluation completed successfully") - @pytest.mark.skip(reason="Nova model evaluation - to be enabled when needed") + # @pytest.mark.skip(reason="Nova model evaluation - to be enabled when needed") def test_benchmark_evaluation_nova_model(self): """ Test benchmark evaluation with Nova model. diff --git a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py index 4f6af39303..56071f963a 100644 --- a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py @@ -233,7 +233,7 @@ def test_custom_scorer_evaluator_validation(self): logger.info("Validation tests passed") - @pytest.mark.skip(reason="Built-in metric evaluation - to be enabled when needed") + # @pytest.mark.skip(reason="Built-in metric evaluation - to be enabled when needed") def test_custom_scorer_with_builtin_metric(self): """ Test custom scorer evaluation with built-in metric. @@ -285,7 +285,7 @@ def test_custom_scorer_with_builtin_metric(self): assert execution.status.overall_status == "Succeeded" logger.info("Built-in metric evaluation completed successfully") - @pytest.mark.skip(reason="Base model only evaluation - not working yet per notebook") + # @pytest.mark.skip(reason="Base model only evaluation - not working yet per notebook") def test_custom_scorer_base_model_only(self): """ Test custom scorer evaluation with base model only (no fine-tuned model). diff --git a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py index a467450d51..03dc6f3ea9 100644 --- a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py @@ -254,7 +254,7 @@ def test_llm_as_judge_builtin_metrics_prefix_handling(self): logger.info("Built-in metrics prefix handling tests passed") - @pytest.mark.skip(reason="Built-in metrics only test - to be enabled when needed") + # @pytest.mark.skip(reason="Built-in metrics only test - to be enabled when needed") def test_llm_as_judge_builtin_metrics_only(self): """ Test LLM-as-Judge evaluation with only built-in metrics (no custom metrics). @@ -302,7 +302,7 @@ def test_llm_as_judge_builtin_metrics_only(self): assert execution.status.overall_status == "Succeeded" logger.info("Built-in metrics only evaluation completed successfully") - @pytest.mark.skip(reason="Custom metrics only test - to be enabled when needed") + # @pytest.mark.skip(reason="Custom metrics only test - to be enabled when needed") def test_llm_as_judge_custom_metrics_only(self): """ Test LLM-as-Judge evaluation with only custom metrics (no built-in metrics). From f58d83683e63070a805cbdc296cf8de551909fc1 Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Wed, 13 May 2026 15:03:45 -0700 Subject: [PATCH 3/8] test: replace redundant LLM-as-judge integ tests with unit tests for None metrics handling --- .../train/test_llm_as_judge_evaluator.py | 94 ------------------- .../evaluate/test_llm_as_judge_evaluator.py | 89 ++++++++++++++++++ 2 files changed, 89 insertions(+), 94 deletions(-) diff --git a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py index 03dc6f3ea9..02b528bfa3 100644 --- a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py @@ -254,98 +254,4 @@ def test_llm_as_judge_builtin_metrics_prefix_handling(self): logger.info("Built-in metrics prefix handling tests passed") - # @pytest.mark.skip(reason="Built-in metrics only test - to be enabled when needed") - def test_llm_as_judge_builtin_metrics_only(self): - """ - Test LLM-as-Judge evaluation with only built-in metrics (no custom metrics). - - This test uses only built-in metrics without custom metrics. - - Note: This test is currently skipped. Remove the @pytest.mark.skip decorator - when you want to enable it. - """ - logger.info("Creating LLMAsJudgeEvaluator with built-in metrics only") - - # Create evaluator with only built-in metrics - evaluator = LLMAsJudgeEvaluator( - model=TEST_CONFIG["model_package_arn"], - evaluator_model=TEST_CONFIG["evaluator_model"], - dataset=TEST_CONFIG["dataset_s3_uri"], - builtin_metrics=["Completeness", "Faithfulness", "Helpfulness"], - # mlflow_resource_arn=TEST_CONFIG["mlflow_tracking_server_arn"], - s3_output_path=TEST_CONFIG["s3_output_path"], - evaluate_base_model=False, - ) - - # Verify evaluator was created - assert evaluator is not None - assert evaluator.builtin_metrics == ["Completeness", "Faithfulness", "Helpfulness"] - assert evaluator.custom_metrics is None - - logger.info("Created evaluator with built-in metrics only") - - # Start evaluation - logger.info("Starting evaluation execution") - execution = evaluator.evaluate() - - # Verify execution was created - assert execution is not None - assert execution.arn is not None - - logger.info(f"Pipeline Execution ARN: {execution.arn}") - - # Wait for completion - logger.info(f"Waiting for evaluation to complete (timeout: {EVALUATION_TIMEOUT_SECONDS}s / {EVALUATION_TIMEOUT_SECONDS//3600}h)") - execution.wait(target_status="Succeeded", poll=30, timeout=EVALUATION_TIMEOUT_SECONDS) - - # Verify completion - assert execution.status.overall_status == "Succeeded" - logger.info("Built-in metrics only evaluation completed successfully") - # @pytest.mark.skip(reason="Custom metrics only test - to be enabled when needed") - def test_llm_as_judge_custom_metrics_only(self): - """ - Test LLM-as-Judge evaluation with only custom metrics (no built-in metrics). - - This test uses only custom metrics without built-in metrics. - - Note: This test is currently skipped. Remove the @pytest.mark.skip decorator - when you want to enable it. - """ - logger.info("Creating LLMAsJudgeEvaluator with custom metrics only") - - # Create evaluator with only custom metrics - evaluator = LLMAsJudgeEvaluator( - model=TEST_CONFIG["model_package_arn"], - evaluator_model=TEST_CONFIG["evaluator_model"], - dataset=TEST_CONFIG["dataset_s3_uri"], - custom_metrics=TEST_CONFIG["custom_metrics_json"], - # mlflow_resource_arn=TEST_CONFIG["mlflow_tracking_server_arn"], - s3_output_path=TEST_CONFIG["s3_output_path"], - evaluate_base_model=False, - ) - - # Verify evaluator was created - assert evaluator is not None - assert evaluator.custom_metrics == TEST_CONFIG["custom_metrics_json"] - assert evaluator.builtin_metrics is None - - logger.info("Created evaluator with custom metrics only") - - # Start evaluation - logger.info("Starting evaluation execution") - execution = evaluator.evaluate() - - # Verify execution was created - assert execution is not None - assert execution.arn is not None - - logger.info(f"Pipeline Execution ARN: {execution.arn}") - - # Wait for completion - logger.info(f"Waiting for evaluation to complete (timeout: {EVALUATION_TIMEOUT_SECONDS}s / {EVALUATION_TIMEOUT_SECONDS//3600}h)") - execution.wait(target_status="Succeeded", poll=30, timeout=EVALUATION_TIMEOUT_SECONDS) - - # Verify completion - assert execution.status.overall_status == "Succeeded" - logger.info("Custom metrics only evaluation completed successfully") diff --git a/sagemaker-train/tests/unit/train/evaluate/test_llm_as_judge_evaluator.py b/sagemaker-train/tests/unit/train/evaluate/test_llm_as_judge_evaluator.py index 60f89b6b69..4fddfbf7de 100644 --- a/sagemaker-train/tests/unit/train/evaluate/test_llm_as_judge_evaluator.py +++ b/sagemaker-train/tests/unit/train/evaluate/test_llm_as_judge_evaluator.py @@ -515,6 +515,95 @@ def test_llm_as_judge_evaluator_get_llmaj_template_additions_no_metrics(mock_art assert additions['custom_metrics'] is None +@patch('sagemaker.train.common_utils.model_resolution._resolve_base_model') +@patch('sagemaker.core.resources.Artifact') +def test_llm_as_judge_evaluator_builtin_metrics_only_no_custom(mock_artifact, mock_resolve): + """Test that evaluator handles builtin_metrics with custom_metrics=None correctly.""" + mock_info = Mock() + mock_info.base_model_name = DEFAULT_MODEL + mock_info.base_model_arn = DEFAULT_BASE_MODEL_ARN + mock_info.source_model_package_arn = None + mock_resolve.return_value = mock_info + + mock_artifact.get_all.return_value = iter([]) + mock_artifact_instance = Mock() + mock_artifact_instance.artifact_arn = DEFAULT_ARTIFACT_ARN + mock_artifact.create.return_value = mock_artifact_instance + + mock_session = Mock() + mock_session.boto_region_name = DEFAULT_REGION + mock_session.boto_session = Mock() + mock_session.get_caller_identity_arn.return_value = DEFAULT_ROLE + + evaluator = LLMAsJudgeEvaluator( + evaluator_model=DEFAULT_EVALUATOR_MODEL, + dataset=DEFAULT_DATASET, + model=DEFAULT_MODEL, + builtin_metrics=["Completeness", "Faithfulness"], + custom_metrics=None, + s3_output_path=DEFAULT_S3_OUTPUT, + mlflow_resource_arn=DEFAULT_MLFLOW_ARN, + model_package_group=DEFAULT_MODEL_PACKAGE_GROUP_ARN, + sagemaker_session=mock_session, + ) + + assert evaluator.builtin_metrics == ["Completeness", "Faithfulness"] + assert evaluator.custom_metrics is None + + eval_name = "test-eval" + additions = evaluator._get_llmaj_template_additions(eval_name) + + assert additions['llmaj_metrics'] == json.dumps(["Completeness", "Faithfulness"]) + assert additions['custom_metrics'] is None + + +@patch('sagemaker.core.s3.client.S3Uploader.upload_string_as_file_body') +@patch('sagemaker.train.common_utils.model_resolution._resolve_base_model') +@patch('sagemaker.core.resources.Artifact') +def test_llm_as_judge_evaluator_custom_metrics_only_no_builtin(mock_artifact, mock_resolve, mock_s3_upload): + """Test that evaluator handles custom_metrics with builtin_metrics=None correctly.""" + mock_info = Mock() + mock_info.base_model_name = DEFAULT_MODEL + mock_info.base_model_arn = DEFAULT_BASE_MODEL_ARN + mock_info.source_model_package_arn = None + mock_resolve.return_value = mock_info + + mock_artifact.get_all.return_value = iter([]) + mock_artifact_instance = Mock() + mock_artifact_instance.artifact_arn = DEFAULT_ARTIFACT_ARN + mock_artifact.create.return_value = mock_artifact_instance + + mock_session = Mock() + mock_session.boto_region_name = DEFAULT_REGION + mock_session.boto_session = Mock() + mock_session.get_caller_identity_arn.return_value = DEFAULT_ROLE + + custom_metrics_json = json.dumps([{"customMetricDefinition": {"name": "TestMetric"}}]) + + evaluator = LLMAsJudgeEvaluator( + evaluator_model=DEFAULT_EVALUATOR_MODEL, + dataset=DEFAULT_DATASET, + model=DEFAULT_MODEL, + builtin_metrics=None, + custom_metrics=custom_metrics_json, + s3_output_path=DEFAULT_S3_OUTPUT, + mlflow_resource_arn=DEFAULT_MLFLOW_ARN, + model_package_group=DEFAULT_MODEL_PACKAGE_GROUP_ARN, + sagemaker_session=mock_session, + ) + + assert evaluator.builtin_metrics is None + assert evaluator.custom_metrics == custom_metrics_json + + eval_name = "test-eval" + additions = evaluator._get_llmaj_template_additions(eval_name) + + assert additions['llmaj_metrics'] == json.dumps([]) + assert additions['custom_metrics'] is not None + assert additions['custom_metrics'].startswith("s3://") + mock_s3_upload.assert_called_once() + + @pytest.mark.skip(reason="Integration test - requires full pipeline execution setup") @patch('sagemaker.train.evaluate.execution.Pipeline') @patch('sagemaker.train.evaluate.llm_as_judge_evaluator.EvaluationPipelineExecution') From b1c094e03870a9eb073fde23d50ffd73961466bf Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Thu, 14 May 2026 00:54:56 -0700 Subject: [PATCH 4/8] mark TestCustomScorerEvaluatorIntegration tests as serial --- .../tests/integ/train/test_custom_scorer_evaluator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py index 56071f963a..54cec846dc 100644 --- a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py @@ -56,6 +56,7 @@ # @pytest.mark.skip(reason="Temporarily skipped - moved from tests/integ/sagemaker/modules/evaluate/") +@pytest.mark.xdist_group("custom_scorer_evaluator") class TestCustomScorerEvaluatorIntegration: """Integration tests for CustomScorerEvaluator with custom evaluator""" From 3ce1324c84b256591ab5296fc4e4aaa6e68b5174 Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Thu, 14 May 2026 01:05:34 -0700 Subject: [PATCH 5/8] update test config for test_benchmark_evaluation_base_model_only --- .../tests/integ/train/test_benchmark_evaluator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index 619971001e..4b8d9e63e8 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -52,12 +52,12 @@ "region": "us-west-2", } -# Base model only evaluation configuration (from commented section in notebook) +# Base model only evaluation configuration BASE_MODEL_ONLY_CONFIG = { "base_model_id": "meta-textgeneration-llama-3-2-1b-instruct", - "dataset_s3_uri": "s3://sagemaker-us-west-2-052150106756/studio-users/d20251107t195443/datasets/2025-11-07T19-55-37-609Z/zc_test.jsonl", - "s3_output_path": "s3://mufi-test-serverless-smtj/eval/", - "mlflow_tracking_server_arn": "arn:aws:sagemaker:us-west-2:052150106756:mlflow-tracking-server/mmlu-eval-experiment", + "dataset_s3_uri": "s3://sagemaker-us-west-2-729646638167/model-customization/eval/zc_test.jsonl", + "s3_output_path": "s3://sagemaker-us-west-2-729646638167/model-customization/eval/", + "mlflow_tracking_server_arn": "arn:aws:sagemaker:us-west-2:729646638167:mlflow-app/app-W7FOBBXZANVX", "region": "us-west-2", } From 82a8a1df21aacffd37411452e119c0e549615727 Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Thu, 14 May 2026 01:06:17 -0700 Subject: [PATCH 6/8] unskip test_hp_contract_mpi_script --- sagemaker-train/tests/integ/train/test_model_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-train/tests/integ/train/test_model_trainer.py b/sagemaker-train/tests/integ/train/test_model_trainer.py index f37d009de8..8692103afe 100644 --- a/sagemaker-train/tests/integ/train/test_model_trainer.py +++ b/sagemaker-train/tests/integ/train/test_model_trainer.py @@ -96,7 +96,7 @@ def test_hp_contract_basic_sh_script(sagemaker_session): # skip this test for now as requirments.txt is not resolved -@pytest.mark.skip +# @pytest.mark.skip def test_hp_contract_mpi_script(sagemaker_session): compute = Compute(instance_type="ml.m5.xlarge", instance_count=2) model_trainer = ModelTrainer( From ab553e6c9525b2addeb3537d884c2c44efedf9f9 Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Thu, 14 May 2026 11:32:58 -0700 Subject: [PATCH 7/8] fix: comment out non-existent mlflow_resource_arn in base_model_only benchmark test --- sagemaker-train/tests/integ/train/test_benchmark_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index 4b8d9e63e8..d11a57b254 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -307,7 +307,7 @@ def test_benchmark_evaluation_base_model_only(self): benchmark=Benchmark.MMLU, model=BASE_MODEL_ONLY_CONFIG["base_model_id"], s3_output_path=BASE_MODEL_ONLY_CONFIG["s3_output_path"], - mlflow_resource_arn=BASE_MODEL_ONLY_CONFIG["mlflow_tracking_server_arn"], + # mlflow_resource_arn=BASE_MODEL_ONLY_CONFIG["mlflow_tracking_server_arn"], base_eval_name="integ-test-base-model-only", # Note: model_package_group not needed for JumpStart models ) From cdf5ce707c1ad23fd75f46b7ae301aeaa2007d58 Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Fri, 15 May 2026 10:36:20 -0700 Subject: [PATCH 8/8] fix: mark three unfixed tests as skipped, to fix them in other pr --- sagemaker-train/tests/integ/train/test_benchmark_evaluator.py | 4 ++-- sagemaker-train/tests/integ/train/test_model_trainer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index d11a57b254..0db9b856d0 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -286,7 +286,7 @@ def test_benchmark_subtasks_validation(self): logger.info("Subtask validation tests passed") - # @pytest.mark.skip(reason="Base model only evaluation - to be enabled when needed") + @pytest.mark.skip(reason="Pipeline creation fails - under investigation") def test_benchmark_evaluation_base_model_only(self): """ Test benchmark evaluation with base model only (no fine-tuned model). @@ -339,7 +339,7 @@ def test_benchmark_evaluation_base_model_only(self): assert execution.status.overall_status == "Succeeded" logger.info("Base model only evaluation completed successfully") - # @pytest.mark.skip(reason="Nova model evaluation - to be enabled when needed") + @pytest.mark.skip(reason="Requires us-east-1 test infrastructure - tracked in AI-5") def test_benchmark_evaluation_nova_model(self): """ Test benchmark evaluation with Nova model. diff --git a/sagemaker-train/tests/integ/train/test_model_trainer.py b/sagemaker-train/tests/integ/train/test_model_trainer.py index 8692103afe..1589143112 100644 --- a/sagemaker-train/tests/integ/train/test_model_trainer.py +++ b/sagemaker-train/tests/integ/train/test_model_trainer.py @@ -96,7 +96,7 @@ def test_hp_contract_basic_sh_script(sagemaker_session): # skip this test for now as requirments.txt is not resolved -# @pytest.mark.skip +@pytest.mark.skip(reason="MPI distributed training does not resolve requirements.txt on worker nodes") def test_hp_contract_mpi_script(sagemaker_session): compute = Compute(instance_type="ml.m5.xlarge", instance_count=2) model_trainer = ModelTrainer(