diff --git a/.github/workflows/gpu-integ-tests.yml b/.github/workflows/gpu-integ-tests.yml index 07eacb29c6..a73d525491 100644 --- a/.github/workflows/gpu-integ-tests.yml +++ b/.github/workflows/gpu-integ-tests.yml @@ -79,13 +79,36 @@ jobs: project-name: sagemaker-python-sdk-ci-health-gpu-integ-tests source-version: refs/heads/master + # Bedrock model-import integ tests. Run serially (concurrency 1) in their own + # CodeBuild project because the "Concurrent model import jobs" Bedrock quota is + # fixed at 1 and not raisable; running them in parallel (as PR checks did) + # makes them collide and flake. us-west-2 only (no us_east_1-marked tests). + # Folded into the same run-level pass/fail metric as the GPU jobs below, so it + # shares the GpuIntegRunAlarm rather than getting a separate alarm. + import-model-integ-tests: + needs: check-prior-success + if: needs.check-prior-success.outputs.already_succeeded != 'true' + runs-on: ubuntu-latest + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.CI_AWS_ROLE_ARN }} + aws-region: us-west-2 + role-duration-seconds: 10800 + - name: Run Bedrock Model-Import Integ Tests + uses: aws-actions/aws-codebuild-run-build@v1 + with: + project-name: sagemaker-python-sdk-ci-health-import-model-integ-tests + source-version: refs/heads/master + # Run-level result: a run is successful only if BOTH region jobs succeeded. # Emits GpuIntegRunFailure = 1 (failed) / 0 (succeeded) to CloudWatch in # us-west-2. The CDK alarm (GpuIntegRunAlarm) sums this over a UTC day and # cuts a daytime sev2 when all of the day's runs failed. Skipped when the gate # short-circuited today's run (an earlier run already succeeded). report-result: - needs: [check-prior-success, gpu-integ-tests, gpu-integ-tests-us-east-1] + needs: [check-prior-success, gpu-integ-tests, gpu-integ-tests-us-east-1, import-model-integ-tests] # Only emit the daily alarm metric for scheduled runs that actually executed # the test jobs: # - check-prior-success.result == 'success': if the gate job itself failed, @@ -111,12 +134,13 @@ jobs: exit 0 fi if [ "${{ needs.gpu-integ-tests.result }}" == "success" ] && \ - [ "${{ needs.gpu-integ-tests-us-east-1.result }}" == "success" ]; then + [ "${{ needs.gpu-integ-tests-us-east-1.result }}" == "success" ] && \ + [ "${{ needs.import-model-integ-tests.result }}" == "success" ]; then value=0 - echo "Both region jobs succeeded; emitting GpuIntegRunFailure=0" + echo "All region/import jobs succeeded; emitting GpuIntegRunFailure=0" else value=1 - echo "At least one region job did not succeed; emitting GpuIntegRunFailure=1" + echo "At least one region/import job did not succeed; emitting GpuIntegRunFailure=1" fi aws cloudwatch put-metric-data \ --namespace GpuIntegRunMetrics \ diff --git a/sagemaker-serve/tests/integ/test_bedrock_provisioned_throughput.py b/sagemaker-serve/tests/integ/test_bedrock_provisioned_throughput.py index fbdd39e9f6..2ee2b5e0ad 100644 --- a/sagemaker-serve/tests/integ/test_bedrock_provisioned_throughput.py +++ b/sagemaker-serve/tests/integ/test_bedrock_provisioned_throughput.py @@ -170,6 +170,7 @@ def _setup_model_files(s3_artifacts_uri, s3_client): @pytest.mark.serial +@pytest.mark.import_model class TestBedrockImportJobPolling: """Test import job polling for OSS models (Option C: deploy only waits for import).""" @@ -236,6 +237,7 @@ def test_deploy_oss_model_waits_for_import_completion( @pytest.mark.serial +@pytest.mark.import_model class TestBedrockProvisionedThroughput: """Test create_provisioned_throughput as a standalone method. diff --git a/sagemaker-serve/tests/integ/test_model_customization_deployment.py b/sagemaker-serve/tests/integ/test_model_customization_deployment.py index 3a9fc33058..b0d9482550 100644 --- a/sagemaker-serve/tests/integ/test_model_customization_deployment.py +++ b/sagemaker-serve/tests/integ/test_model_customization_deployment.py @@ -575,6 +575,7 @@ def test_bedrock_model_builder_creation(self, training_job): f"BedrockModelBuilder creation failed: {str(e)}. This might be due to sagemaker-core integration issues.") @pytest.mark.slow + @pytest.mark.import_model def test_bedrock_job_created(self, deployed_model_arn): """Test that Bedrock import job was created successfully.""" assert deployed_model_arn is not None @@ -583,6 +584,7 @@ def test_bedrock_job_created(self, deployed_model_arn): # Documentation recommends retries: https://docs.aws.amazon.com/bedrock/latest/userguide/invoke-imported-model.html#handle-model-not-ready-exception. # TODO: Fix using provisioned throughput or better wait mechanism @pytest.mark.slow + @pytest.mark.import_model def test_bedrock_model_invoke(self, deployed_model_arn, bedrock_runtime): logger.warning( "This test is known to be flaky due to 'model not ready' exceptions from Bedrock. " diff --git a/sagemaker-serve/tox.ini b/sagemaker-serve/tox.ini index 781cd7287a..19f112ad05 100644 --- a/sagemaker-serve/tox.ini +++ b/sagemaker-serve/tox.ini @@ -65,6 +65,7 @@ markers = timeout: mark a test as a timeout. gpu_intensive: mark a test as GPU resource intensive (runs on scheduled CI, not PR checks). us_east_1: mark a test that requires us-east-1 test account credentials (784379639078). + import_model: mark a test that creates a Bedrock model import job. Concurrent model import jobs are capped at 1 by a non-raisable Bedrock service quota, so these run serially in a dedicated scheduled CI run, not in PR checks. [testenv] setenv =