From 7ef12c25b8c83ff102fac9b2606e7386dbd57a11 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 29 Jan 2026 11:02:54 +0530 Subject: [PATCH 1/6] Windows test --- .github/workflows/test.yml | 10 ++++++- docker-compose.yml | 53 ++++++++++++++++++++++++++++++++++++++ docker/update.sh | 31 ++++++++++++++++++++++ pytest.ini | 4 +++ tests/conftest.py | 42 ++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 docker-compose.yml create mode 100644 docker/update.sh create mode 100644 pytest.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d65cc3796..c52486d0a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -74,7 +74,15 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install test dependencies and scikit-learn + - name: Checkout server-api and patch Docker path + if: runner.os == 'Linux' + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + + - name: Install test dependencies, scikit-learn, and optional pandas + shell: bash run: | python -m pip install --upgrade pip pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..20fcef863 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,53 @@ +services: + database: + image: "openml/test-database:20240105" + container_name: "openml-test-db-ci" + environment: + MYSQL_ROOT_PASSWORD: ok + ports: + - "33060:3306" + healthcheck: + test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] + start_period: 30s + interval: 5s + retries: 10 + + # SETUP WORKER + database-setup: + image: mysql + container_name: "openml-test-setup-ci" + volumes: + # You MUST save the update.sh content you shared earlier to this path + - ./docker/update.sh:/database-update.sh + command: /bin/sh -c "/database-update.sh" + depends_on: + database: + condition: service_healthy + + php-api: + image: "openml/php-rest-api:v1.2.2" + container_name: "openml-php-api-ci" + ports: + - "9002:80" + depends_on: + database: + condition: service_started + environment: + - DB_HOST_OPENML=database:3306 + - DB_HOST_EXPDB=database:3306 + - BASE_URL=http://localhost:9002/ + - INDEX_ES_DURING_STARTUP=false + + # V2 API (PYTHON) + python-api: + container_name: "openml-python-api-ci" + build: + # TODO: replace with image when available + context: ../server-api + dockerfile: docker/python/Dockerfile + ports: + - "9001:8000" + depends_on: + - database + environment: + - DATABASE_URL=mysql://root:ok@database:3306/openml \ No newline at end of file diff --git a/docker/update.sh b/docker/update.sh new file mode 100644 index 000000000..7e9864742 --- /dev/null +++ b/docker/update.sh @@ -0,0 +1,31 @@ +#/bin/bash +# Change the filepath of openml.file +# from "https://www.openml.org/data/download/1666876/phpFsFYVN" +# to "http://minio:9000/datasets/0000/0001/phpFsFYVN" +mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' + +# Update openml.expdb.dataset with the same url +mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' + + + + + +# Create the data_feature_description TABLE. TODO: can we make sure this table exists already? +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` ( + `did` int unsigned NOT NULL, + `index` int unsigned NOT NULL, + `uploader` mediumint unsigned NOT NULL, + `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `description_type` enum("plain", "ontology") NOT NULL, + `value` varchar(256) NOT NULL, + KEY `did` (`did`,`index`), + CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE +)' + +# SET dataset 1 to active (used in unittests java) +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)' +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";' + +# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing. +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..69fbd903f --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +env = + OPENML_SERVER = http://localhost:9001/api/v2 + OPENML_API_KEY = AD000000000000000000000000000000 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..890978558 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from __future__ import annotations import multiprocessing +import sys multiprocessing.set_start_method("spawn", force=True) @@ -35,6 +36,9 @@ import pytest import openml_sklearn +import time +import subprocess +import requests import openml from openml.testing import TestBase @@ -296,6 +300,44 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) +# This starts the entire stack once for the whole test run +@pytest.fixture(scope="session", autouse=True) +def openml_docker_stack(): + # if sys.platform == "win32": + # yield + # return + # 1. Start the containers defined in your final docker-compose.yml + subprocess.run(["docker", "compose", "up", "-d"], check=True) + + # 2. Wait for the database setup worker to finish its tasks + # This ensures update.sh has finished before we hit the APIs + subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) + + # 3. Quick health check: Wait for the Python API to respond on port 9001 + timeout = 30 + start = time.time() + while time.time() - start < timeout: + try: + if requests.get("http://localhost:9001/api/v2/").status_code == 200: + break + except requests.exceptions.ConnectionError: + time.sleep(1) + + yield # Tests run here + + # 4. Tear everything down after tests finish to keep the machine clean + subprocess.run(["docker", "compose", "down", "-v"], check=True) + +# This resets the database state before every single test to prevent race conditions +@pytest.fixture(scope="function", autouse=True) +def reset_db_state(): + # if sys.platform == "win32": + # yield + # return + # Fast restart of the database container to return to the 'baked-in' state + subprocess.run(["docker", "compose", "restart", "database"], check=True) + # Re-run the setup worker to ensure paths are still correct + subprocess.run(["docker", "compose", "up", "database-setup"], check=True) @pytest.fixture def static_cache_dir(): From 7c14c684d35eb409562b590fd225a315f7108ce0 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Thu, 29 Jan 2026 16:35:22 +0530 Subject: [PATCH 2/6] bug fixing --- .github/workflows/test.yml | 2 +- pyproject.toml | 15 --------------- pytest.ini | 14 ++++++++++++++ tests/conftest.py | 16 ++++------------ tests/test_1.py | 14 ++++++++++++++ 5 files changed, 33 insertions(+), 28 deletions(-) create mode 100644 tests/test_1.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c52486d0a..c2b05a6be 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -75,7 +75,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Checkout server-api and patch Docker path - if: runner.os == 'Linux' + # if: matrix.os == 'Linux' shell: bash run: | git clone --depth 1 https://github.com/openml/server-api.git server-api diff --git a/pyproject.toml b/pyproject.toml index 93a6ffbfa..0627d0901 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,21 +124,6 @@ openml = ["*.txt", "*.md", "py.typed"] [tool.setuptools.dynamic] version = {attr = "openml.__version__.__version__"} -# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref -[tool.pytest.ini_options] -testpaths = ["tests"] -minversion = "7.0" -xfail_strict = true -filterwarnings=[ - "ignore:the matrix subclass:PendingDeprecationWarning" -] -markers = [ - "server: anything that connects to a server", - "upload: anything that uploads to a server", - "production: any interaction with the production server", - "cache: anything that interacts with the (test) cache", -] - # https://github.com/charliermarsh/ruff [tool.ruff] target-version = "py310" diff --git a/pytest.ini b/pytest.ini index 69fbd903f..12d9fe136 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,18 @@ [pytest] +minversion = 7.0 +testpaths = tests +xfail_strict = true + +filterwarnings = + ignore:the matrix subclass:PendingDeprecationWarning + +markers = + server: anything that connects to a server + upload: anything that uploads to a server + production: any interaction with the production server + cache: anything that interacts with the (test) cache + uses_test_server: tests that use the local docker stack + env = OPENML_SERVER = http://localhost:9001/api/v2 OPENML_API_KEY = AD000000000000000000000000000000 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 890978558..7ea9257f6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -306,37 +306,29 @@ def openml_docker_stack(): # if sys.platform == "win32": # yield # return - # 1. Start the containers defined in your final docker-compose.yml subprocess.run(["docker", "compose", "up", "-d"], check=True) - - # 2. Wait for the database setup worker to finish its tasks - # This ensures update.sh has finished before we hit the APIs subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) - # 3. Quick health check: Wait for the Python API to respond on port 9001 - timeout = 30 + timeout = 10 start = time.time() while time.time() - start < timeout: try: - if requests.get("http://localhost:9001/api/v2/").status_code == 200: + response = requests.get("http://localhost:9001/api/v2/") + if response.status_code in [200, 404, 405]: break except requests.exceptions.ConnectionError: time.sleep(1) - yield # Tests run here + yield - # 4. Tear everything down after tests finish to keep the machine clean subprocess.run(["docker", "compose", "down", "-v"], check=True) -# This resets the database state before every single test to prevent race conditions @pytest.fixture(scope="function", autouse=True) def reset_db_state(): # if sys.platform == "win32": # yield # return - # Fast restart of the database container to return to the 'baked-in' state subprocess.run(["docker", "compose", "restart", "database"], check=True) - # Re-run the setup worker to ensure paths are still correct subprocess.run(["docker", "compose", "up", "database-setup"], check=True) @pytest.fixture diff --git a/tests/test_1.py b/tests/test_1.py new file mode 100644 index 000000000..169ebbd03 --- /dev/null +++ b/tests/test_1.py @@ -0,0 +1,14 @@ +import pytest +import requests + +# Requesting the 'openml_docker_stack' fixture forces it to run! +def test_can_connect_to_local_docker(openml_docker_stack): + print("\n🐳 Docker Stack is UP! Checking connection...") + + # Try to talk to the V2 API we just built + response = requests.get("http://localhost:9001/api/v2") + + # If we get a 200 OK or 404 (Not Found), the server is alive. + # If it fails, this line will crash the test. + assert response.status_code in [200, 404] + print("āœ… Successfully connected to Local V2 API on port 9001") \ No newline at end of file From 16ceeaab9f2cb65eb9a9025704c4e31204a6fb57 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:06:38 +0530 Subject: [PATCH 3/6] remove db refresh every test --- .github/workflows/test.yml | 1 - tests/conftest.py | 8 -------- tests/test_1.py | 6 ++---- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 65ebcbe4a..228500278 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -83,7 +83,6 @@ jobs: python-version: ${{ matrix.python-version }} - name: Checkout server-api and patch Docker path - # if: matrix.os == 'Linux' shell: bash run: | git clone --depth 1 https://github.com/openml/server-api.git server-api diff --git a/tests/conftest.py b/tests/conftest.py index 7ea9257f6..e9bb08013 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -323,14 +323,6 @@ def openml_docker_stack(): subprocess.run(["docker", "compose", "down", "-v"], check=True) -@pytest.fixture(scope="function", autouse=True) -def reset_db_state(): - # if sys.platform == "win32": - # yield - # return - subprocess.run(["docker", "compose", "restart", "database"], check=True) - subprocess.run(["docker", "compose", "up", "database-setup"], check=True) - @pytest.fixture def static_cache_dir(): return Path(__file__).parent / "files" diff --git a/tests/test_1.py b/tests/test_1.py index 169ebbd03..318fa83c1 100644 --- a/tests/test_1.py +++ b/tests/test_1.py @@ -3,12 +3,10 @@ # Requesting the 'openml_docker_stack' fixture forces it to run! def test_can_connect_to_local_docker(openml_docker_stack): - print("\n🐳 Docker Stack is UP! Checking connection...") # Try to talk to the V2 API we just built - response = requests.get("http://localhost:9001/api/v2") + response = requests.get("http://localhost:9001/docs") # If we get a 200 OK or 404 (Not Found), the server is alive. # If it fails, this line will crash the test. - assert response.status_code in [200, 404] - print("āœ… Successfully connected to Local V2 API on port 9001") \ No newline at end of file + assert response.status_code in [200] From 015acf46330c5604824b30d9c28a0538a54dd120 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:18:32 +0530 Subject: [PATCH 4/6] bug fixing --- .github/workflows/test.yml | 8 ++++---- pyproject.toml | 19 +++++++++++++++++++ pytest.ini | 18 ------------------ tests/conftest.py | 9 ++------- 4 files changed, 25 insertions(+), 29 deletions(-) delete mode 100644 pytest.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 228500278..686440234 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -83,10 +83,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Checkout server-api and patch Docker path - shell: bash - run: | - git clone --depth 1 https://github.com/openml/server-api.git server-api - sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml - name: Install test dependencies, scikit-learn, and optional pandas shell: bash diff --git a/pyproject.toml b/pyproject.toml index 0627d0901..6165f9497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -124,6 +124,25 @@ openml = ["*.txt", "*.md", "py.typed"] [tool.setuptools.dynamic] version = {attr = "openml.__version__.__version__"} +# https://docs.pytest.org/en/7.2.x/reference/reference.html#ini-options-ref +[tool.pytest.ini_options] +testpaths = ["tests"] +minversion = "7.0" +xfail_strict = true +filterwarnings=[ + "ignore:the matrix subclass:PendingDeprecationWarning" +] +markers = [ + "server: anything that connects to a server", + "upload: anything that uploads to a server", + "production: any interaction with the production server", + "cache: anything that interacts with the (test) cache", +] +env = [ + "OPENML_SERVER=http://localhost:9001/api/v2", + "OPENML_API_KEY=AD000000000000000000000000000000", +] + # https://github.com/charliermarsh/ruff [tool.ruff] target-version = "py310" diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 12d9fe136..000000000 --- a/pytest.ini +++ /dev/null @@ -1,18 +0,0 @@ -[pytest] -minversion = 7.0 -testpaths = tests -xfail_strict = true - -filterwarnings = - ignore:the matrix subclass:PendingDeprecationWarning - -markers = - server: anything that connects to a server - upload: anything that uploads to a server - production: any interaction with the production server - cache: anything that interacts with the (test) cache - uses_test_server: tests that use the local docker stack - -env = - OPENML_SERVER = http://localhost:9001/api/v2 - OPENML_API_KEY = AD000000000000000000000000000000 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index e9bb08013..a2c29a6ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -300,12 +300,8 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) -# This starts the entire stack once for the whole test run @pytest.fixture(scope="session", autouse=True) def openml_docker_stack(): - # if sys.platform == "win32": - # yield - # return subprocess.run(["docker", "compose", "up", "-d"], check=True) subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) @@ -313,9 +309,8 @@ def openml_docker_stack(): start = time.time() while time.time() - start < timeout: try: - response = requests.get("http://localhost:9001/api/v2/") - if response.status_code in [200, 404, 405]: - break + requests.get("http://localhost:9001/api/v2/") + break except requests.exceptions.ConnectionError: time.sleep(1) From 937fc770adf8a618851e7cc602b2a87e23f504fe Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:50:32 +0530 Subject: [PATCH 5/6] bug fixing --- .github/workflows/test.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 686440234..107494bf0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -82,12 +82,6 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Checkout server-api and patch Docker path - shell: bash - run: | - git clone --depth 1 https://github.com/openml/server-api.git server-api - sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml - - name: Install test dependencies, scikit-learn, and optional pandas shell: bash run: | @@ -107,6 +101,12 @@ jobs: echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" + - name: Checkout server-api and patch Docker path + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + - name: Show installed dependencies run: python -m pip list @@ -145,6 +145,13 @@ jobs: run: | # we need a separate step because of the bash-specific if-statement in the previous one. pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + - name: Cleanup Docker setup + if: always() + shell: bash + run: | + rm -rf server-api + git checkout docker-compose.yml + - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() run: | From 30972f8d7c7249f64fc605a17ca006351a1d6149 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 30 Jan 2026 02:53:36 +0530 Subject: [PATCH 6/6] bug fixing --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 107494bf0..f3d16aeeb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -146,10 +146,10 @@ jobs: pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" - name: Cleanup Docker setup - if: always() - shell: bash - run: | - rm -rf server-api + if: always() + shell: bash + run: | + rm -rf server-api git checkout docker-compose.yml - name: Check for files left behind by test