From a413f82e6e33ab585ce2f99b8444245b2a71f585 Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Tue, 16 Jun 2026 10:55:11 -0700 Subject: [PATCH 1/2] chore: Register 'serial' pytest marker in mlops tox.ini The Lake Formation integ tests use @pytest.mark.serial, but the marker was never registered, producing PytestUnknownMarkWarning noise on every run. Register it in the [pytest] markers section. --- sagemaker-mlops/tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/sagemaker-mlops/tox.ini b/sagemaker-mlops/tox.ini index 78f8cd8834..544038a6b5 100644 --- a/sagemaker-mlops/tox.ini +++ b/sagemaker-mlops/tox.ini @@ -64,6 +64,7 @@ markers = release image_uris_unit_test timeout: mark a test as a timeout. + serial: mark a test that must not run concurrently with others sharing the same resources. [testenv] setenv = From 0a9da4278304cf92b3beded84d485839e6b67bfd Mon Sep 17 00:00:00 2001 From: Lucas Jia Date: Tue, 16 Jun 2026 11:01:23 -0700 Subject: [PATCH 2/2] test: Xfail serve deploy integ test on InsufficientInstanceCapacity --- .../test_model_customization_deployment.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py index 3a9fc33058..b8cc3ad33d 100644 --- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py +++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py @@ -115,6 +115,8 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu from sagemaker.serve import ModelBuilder import time + from sagemaker.core.utils.exceptions import FailedStatusError + training_job = TrainingJob.get(training_job_name=training_job_name, region=AWS_REGION) model_builder = ModelBuilder(model=training_job, instance_type="ml.g5.4xlarge", sagemaker_session=sagemaker_session) model_builder.accept_eula = True @@ -123,10 +125,21 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu peft_type = model_builder._fetch_peft() adapter_name = f"{endpoint_name}-adapter" - endpoint = model_builder.deploy( - endpoint_name=endpoint_name, - inference_component_name=adapter_name if peft_type == "LORA" else None, - ) + try: + endpoint = model_builder.deploy( + endpoint_name=endpoint_name, + inference_component_name=adapter_name if peft_type == "LORA" else None, + ) + except FailedStatusError as e: + # Endpoint provisioning can fail when the region is temporarily out of + # capacity for the requested instance type. This is an environmental + # condition unrelated to the SDK, so xfail rather than fail the build. + if "InsufficientInstanceCapacity" in str(e): + cleanup_endpoints.append(endpoint_name) + pytest.xfail( + f"InsufficientInstanceCapacity for ml.g5.4xlarge in {AWS_REGION}: {e}" + ) + raise cleanup_endpoints.append(endpoint_name)