diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 55a4a354a..31cdff602 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -96,14 +96,22 @@ jobs: echo "Repository status before tests: $git_status" - name: Show installed dependencies run: python -m pip list - - name: Run tests on Ubuntu + - name: Run tests on Ubuntu Test if: matrix.os == 'ubuntu-latest' run: | if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi # Most of the time, running only the scikit-learn tests is sufficient - if [ ${{ matrix.sklearn-only }} = 'true' ]; then sklearn='-m sklearn'; fi - echo pytest -n 4 --durations=20 --dist load -sv $codecov $sklearn -o log_cli=true - pytest -n 4 --durations=20 --dist load -sv $codecov $sklearn -o log_cli=true + if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi + echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + - name: Run tests on Ubuntu Production + if: matrix.os == 'ubuntu-latest' + run: | + if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi + # Most of the time, running only the scikit-learn tests is sufficient + if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and production'; else marks='production'; fi + echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. diff --git a/openml/testing.py b/openml/testing.py index a3a5806e8..f026c6137 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -101,7 +101,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: self.cached = True openml.config.apikey = TestBase.apikey self.production_server = "https://www.openml.org/api/v1/xml" - openml.config.server = TestBase.test_server openml.config.avoid_duplicate_runs = False openml.config.set_root_cache_directory(str(self.workdir)) @@ -120,7 +119,6 @@ def tearDown(self) -> None: # one of the files may still be used by another process raise e - openml.config.server = self.production_server openml.config.connection_n_retries = self.connection_n_retries openml.config.retry_policy = self.retry_policy diff --git a/tests/conftest.py b/tests/conftest.py index b523117c1..968a5a58a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -268,11 +268,15 @@ def as_robot() -> Iterator[None]: openml.config.set_retry_policy(policy, n_retries) -@pytest.fixture(autouse=True, scope="session") -def with_test_server(): - openml.config.start_using_configuration_for_example() +@pytest.fixture(autouse=True) +def with_server(request): + if "production" in request.keywords: + openml.config.server = "https://www.openml.org/api/v1/xml" + yield + return + openml.config.server = "https://test.openml.org/api/v1/xml" + openml.config.apikey = "c0c42819af31e706efe1f4b88c23c6c1" yield - openml.config.stop_using_configuration_for_example() @pytest.fixture(autouse=True) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index d6b26d864..ad8bbc693 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1951,7 +1951,8 @@ def test_get_dataset_parquet(requests_mock, test_files_directory): content_file = ( test_files_directory / "mock_responses" / "datasets" / "data_description_61.xml" ) - requests_mock.get("https://www.openml.org/api/v1/xml/data/61", text=content_file.read_text()) + # While the mocked example is from production, unit tests by default connect to the test server. + requests_mock.get("https://test.openml.org/api/v1/xml/data/61", text=content_file.read_text()) dataset = openml.datasets.get_dataset(61, download_data=True) assert dataset._parquet_url is not None assert dataset.parquet_file is not None diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 7235075c0..5023943d7 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -68,7 +68,7 @@ class TestRun(TestBase): "task_meta_data": { "task_type": TaskType.SUPERVISED_CLASSIFICATION, "dataset_id": 16, # credit-a - "estimation_procedure_id": 1, + "estimation_procedure_id": 6, "target_name": "class", }, } @@ -81,7 +81,7 @@ class TestRun(TestBase): "task_meta_data": { "task_type": TaskType.SUPERVISED_CLASSIFICATION, "dataset_id": 20, # diabetes - "estimation_procedure_id": 1, + "estimation_procedure_id": 5, "target_name": "class", }, } diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py index d3553262f..d4f2ed9d7 100644 --- a/tests/test_tasks/test_classification_task.py +++ b/tests/test_tasks/test_classification_task.py @@ -2,6 +2,7 @@ from __future__ import annotations import pandas as pd +import pytest from openml.tasks import TaskType, get_task @@ -17,14 +18,6 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_CLASSIFICATION self.estimation_procedure = 5 - def test_get_X_and_Y(self): - X, Y = super().test_get_X_and_Y() - assert X.shape == (768, 8) - assert isinstance(X, pd.DataFrame) - assert Y.shape == (768,) - assert isinstance(Y, pd.Series) - assert pd.api.types.is_categorical_dtype(Y) - def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id @@ -34,4 +27,15 @@ def test_download_task(self): def test_class_labels(self): task = get_task(self.task_id) - assert task.class_labels == ["tested_negative", "tested_positive"] \ No newline at end of file + assert task.class_labels == ["tested_negative", "tested_positive"] + + +@pytest.mark.server() +def test_get_X_and_Y(): + task = get_task(119) + X, Y = task.get_X_and_y() + assert X.shape == (768, 8) + assert isinstance(X, pd.DataFrame) + assert Y.shape == (768,) + assert isinstance(Y, pd.Series) + assert pd.api.types.is_categorical_dtype(Y)