Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sagemaker-mlops/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ markers =
release
image_uris_unit_test
timeout: mark a test as a timeout.
serial: mark a test that must not run concurrently with others sharing the same resources.

[testenv]
setenv =
Expand Down
21 changes: 17 additions & 4 deletions sagemaker-serve/tests/integ/test_model_customization_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
from sagemaker.serve import ModelBuilder
import time

from sagemaker.core.utils.exceptions import FailedStatusError

training_job = TrainingJob.get(training_job_name=training_job_name, region=AWS_REGION)
model_builder = ModelBuilder(model=training_job, instance_type="ml.g5.4xlarge", sagemaker_session=sagemaker_session)
model_builder.accept_eula = True
Expand All @@ -123,10 +125,21 @@ def test_deploy_from_training_job(self, training_job_name, endpoint_name, cleanu
peft_type = model_builder._fetch_peft()
adapter_name = f"{endpoint_name}-adapter"

endpoint = model_builder.deploy(
endpoint_name=endpoint_name,
inference_component_name=adapter_name if peft_type == "LORA" else None,
)
try:
endpoint = model_builder.deploy(
endpoint_name=endpoint_name,
inference_component_name=adapter_name if peft_type == "LORA" else None,
)
except FailedStatusError as e:
# Endpoint provisioning can fail when the region is temporarily out of
# capacity for the requested instance type. This is an environmental
# condition unrelated to the SDK, so xfail rather than fail the build.
if "InsufficientInstanceCapacity" in str(e):
cleanup_endpoints.append(endpoint_name)
pytest.xfail(
f"InsufficientInstanceCapacity for ml.g5.4xlarge in {AWS_REGION}: {e}"
)
raise

cleanup_endpoints.append(endpoint_name)

Expand Down
Loading