diff --git a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py index 9cbfc13011..ad92523aec 100644 --- a/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_benchmark_evaluator.py @@ -289,6 +289,7 @@ def test_benchmark_subtasks_validation(self): logger.info("Subtask validation tests passed") @pytest.mark.skip(reason="Pipeline creation fails - under investigation") + @pytest.mark.gpu_intensive def test_benchmark_evaluation_base_model_only(self): """ Test benchmark evaluation with base model only (no fine-tuned model). diff --git a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py index a4e6044ed9..a71af868a7 100644 --- a/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_custom_scorer_evaluator.py @@ -22,8 +22,6 @@ EvaluationPipelineExecution, ) -pytestmark = pytest.mark.gpu_intensive - # Configure logging logging.basicConfig( level=logging.INFO, @@ -237,6 +235,7 @@ def test_custom_scorer_evaluator_validation(self): logger.info("Validation tests passed") # @pytest.mark.skip(reason="Built-in metric evaluation - to be enabled when needed") + @pytest.mark.gpu_intensive def test_custom_scorer_with_builtin_metric(self): """ Test custom scorer evaluation with built-in metric. diff --git a/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py b/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py index 34a159a2d5..af673adea2 100644 --- a/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_dpo_trainer_integration.py @@ -21,9 +21,7 @@ from sagemaker.train.common import TrainingType import pytest -pytestmark = pytest.mark.gpu_intensive - - +@pytest.mark.gpu_intensive def test_dpo_trainer_lora_complete_workflow(sagemaker_session): """Test complete DPO training workflow with LORA.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -64,6 +62,7 @@ def test_dpo_trainer_lora_complete_workflow(sagemaker_session): assert training_job.output_model_package_arn is not None +@pytest.mark.gpu_intensive def test_dpo_trainer_with_validation_dataset(sagemaker_session): """Test DPO trainer with both training and validation datasets.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" diff --git a/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py b/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py index 1883c99be3..1da31f71c6 100644 --- a/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py +++ b/sagemaker-train/tests/integ/train/test_llm_as_judge_base_model_fix.py @@ -28,8 +28,6 @@ EvaluationPipelineExecution, ) -pytestmark = pytest.mark.gpu_intensive - # Configure logging logging.basicConfig( level=logging.INFO, diff --git a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py index 84318b93ea..02b528bfa3 100644 --- a/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py +++ b/sagemaker-train/tests/integ/train/test_llm_as_judge_evaluator.py @@ -22,8 +22,6 @@ EvaluationPipelineExecution, ) -pytestmark = pytest.mark.gpu_intensive - # Configure logging logging.basicConfig( level=logging.INFO, diff --git a/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py b/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py index 3c52d5f8fe..a84869b987 100644 --- a/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_rlaif_trainer_integration.py @@ -21,9 +21,7 @@ from sagemaker.train.common import TrainingType import pytest -pytestmark = pytest.mark.gpu_intensive - - +@pytest.mark.gpu_intensive def test_rlaif_trainer_lora_complete_workflow(sagemaker_session): """Test complete RLAIF training workflow with LORA.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -65,6 +63,7 @@ def test_rlaif_trainer_lora_complete_workflow(sagemaker_session): assert training_job.output_model_package_arn is not None +@pytest.mark.gpu_intensive def test_rlaif_trainer_with_custom_reward_settings(sagemaker_session): """Test RLAIF trainer with different reward model and prompt.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -105,6 +104,7 @@ def test_rlaif_trainer_with_custom_reward_settings(sagemaker_session): assert training_job.output_model_package_arn is not None +@pytest.mark.gpu_intensive def test_rlaif_trainer_continued_finetuning(sagemaker_session): """Test complete RLAIF training workflow with LORA.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" diff --git a/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py b/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py index 5d5883c307..e8320c29ac 100644 --- a/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_rlvr_trainer_integration.py @@ -21,9 +21,7 @@ from sagemaker.train.rlvr_trainer import RLVRTrainer from sagemaker.train.common import TrainingType -pytestmark = pytest.mark.gpu_intensive - - +@pytest.mark.gpu_intensive def test_rlvr_trainer_lora_complete_workflow(sagemaker_session): """Test complete RLVR training workflow with LORA.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -63,6 +61,7 @@ def test_rlvr_trainer_lora_complete_workflow(sagemaker_session): assert training_job.output_model_package_arn is not None +@pytest.mark.gpu_intensive def test_rlvr_trainer_with_custom_reward_function(sagemaker_session): """Test RLVR trainer with custom reward function.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -102,6 +101,7 @@ def test_rlvr_trainer_with_custom_reward_function(sagemaker_session): assert training_job.output_model_package_arn is not None +@pytest.mark.gpu_intensive @pytest.mark.skip(reason="TODO: Nova test to be enabled in us-east-1") def test_rlvr_trainer_nova_workflow(sagemaker_session_us_east_1): """Test RLVR training workflow with Nova model.""" diff --git a/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py b/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py index 4c2688b8f3..93be84a738 100644 --- a/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py +++ b/sagemaker-train/tests/integ/train/test_sft_trainer_integration.py @@ -21,9 +21,7 @@ from sagemaker.train.sft_trainer import SFTTrainer from sagemaker.train.common import TrainingType -pytestmark = pytest.mark.gpu_intensive - - +@pytest.mark.gpu_intensive def test_sft_trainer_lora_complete_workflow(sagemaker_session): """Test complete SFT training workflow with LORA.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -61,6 +59,7 @@ def test_sft_trainer_lora_complete_workflow(sagemaker_session): assert training_job.output_model_package_arn is not None +@pytest.mark.gpu_intensive def test_sft_trainer_with_validation_dataset(sagemaker_session): """Test SFT trainer with both training and validation datasets.""" unique_id = f"{int(time.time())}-{random.randint(1000, 9999)}" @@ -96,6 +95,7 @@ def test_sft_trainer_with_validation_dataset(sagemaker_session): assert hasattr(training_job, 'output_model_package_arn') +@pytest.mark.gpu_intensive @pytest.mark.skip(reason="TODO: Nova test to be enabled in us-east-1") def test_sft_trainer_nova_workflow(sagemaker_session_us_east_1): """Test SFT trainer with Nova model.""" diff --git a/sagemaker-train/tests/integ/train/test_tuner_distributed.py b/sagemaker-train/tests/integ/train/test_tuner_distributed.py index 876116a614..2af2b7cb4d 100644 --- a/sagemaker-train/tests/integ/train/test_tuner_distributed.py +++ b/sagemaker-train/tests/integ/train/test_tuner_distributed.py @@ -29,7 +29,6 @@ from sagemaker.train.distributed import Torchrun from sagemaker.train.tuner import HyperparameterTuner -pytestmark = pytest.mark.gpu_intensive from sagemaker.core.parameter import ContinuousParameter logger = logging.getLogger(__name__)