diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b10721f55..f3d16aeeb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,6 +101,12 @@ jobs: echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" + - name: Checkout server-api and patch Docker path + shell: bash + run: | + git clone --depth 1 https://github.com/openml/server-api.git server-api + sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml + - name: Show installed dependencies run: python -m pip list @@ -139,6 +145,13 @@ jobs: run: | # we need a separate step because of the bash-specific if-statement in the previous one. pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server" + - name: Cleanup Docker setup + if: always() + shell: bash + run: | + rm -rf server-api + git checkout docker-compose.yml + - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() run: | diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..20fcef863 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,53 @@ +services: + database: + image: "openml/test-database:20240105" + container_name: "openml-test-db-ci" + environment: + MYSQL_ROOT_PASSWORD: ok + ports: + - "33060:3306" + healthcheck: + test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] + start_period: 30s + interval: 5s + retries: 10 + + # SETUP WORKER + database-setup: + image: mysql + container_name: "openml-test-setup-ci" + volumes: + # You MUST save the update.sh content you shared earlier to this path + - ./docker/update.sh:/database-update.sh + command: /bin/sh -c "/database-update.sh" + depends_on: + database: + condition: service_healthy + + php-api: + image: "openml/php-rest-api:v1.2.2" + container_name: "openml-php-api-ci" + ports: + - "9002:80" + depends_on: + database: + condition: service_started + environment: + - DB_HOST_OPENML=database:3306 + - DB_HOST_EXPDB=database:3306 + - BASE_URL=http://localhost:9002/ + - INDEX_ES_DURING_STARTUP=false + + # V2 API (PYTHON) + python-api: + container_name: "openml-python-api-ci" + build: + # TODO: replace with image when available + context: ../server-api + dockerfile: docker/python/Dockerfile + ports: + - "9001:8000" + depends_on: + - database + environment: + - DATABASE_URL=mysql://root:ok@database:3306/openml \ No newline at end of file diff --git a/docker/update.sh b/docker/update.sh new file mode 100644 index 000000000..7e9864742 --- /dev/null +++ b/docker/update.sh @@ -0,0 +1,31 @@ +#/bin/bash +# Change the filepath of openml.file +# from "https://www.openml.org/data/download/1666876/phpFsFYVN" +# to "http://minio:9000/datasets/0000/0001/phpFsFYVN" +mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";' + +# Update openml.expdb.dataset with the same url +mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;' + + + + + +# Create the data_feature_description TABLE. TODO: can we make sure this table exists already? +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` ( + `did` int unsigned NOT NULL, + `index` int unsigned NOT NULL, + `uploader` mediumint unsigned NOT NULL, + `date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, + `description_type` enum("plain", "ontology") NOT NULL, + `value` varchar(256) NOT NULL, + KEY `did` (`did`,`index`), + CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE +)' + +# SET dataset 1 to active (used in unittests java) +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)' +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";' + +# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing. +mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 93a6ffbfa..6165f9497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -138,6 +138,10 @@ markers = [ "production: any interaction with the production server", "cache: anything that interacts with the (test) cache", ] +env = [ + "OPENML_SERVER=http://localhost:9001/api/v2", + "OPENML_API_KEY=AD000000000000000000000000000000", +] # https://github.com/charliermarsh/ruff [tool.ruff] diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..a2c29a6ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,6 +24,7 @@ from __future__ import annotations import multiprocessing +import sys multiprocessing.set_start_method("spawn", force=True) @@ -35,6 +36,9 @@ import pytest import openml_sklearn +import time +import subprocess +import requests import openml from openml.testing import TestBase @@ -296,6 +300,23 @@ def with_test_cache(test_files_directory, request): if tmp_cache.exists(): shutil.rmtree(tmp_cache) +@pytest.fixture(scope="session", autouse=True) +def openml_docker_stack(): + subprocess.run(["docker", "compose", "up", "-d"], check=True) + subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True) + + timeout = 10 + start = time.time() + while time.time() - start < timeout: + try: + requests.get("http://localhost:9001/api/v2/") + break + except requests.exceptions.ConnectionError: + time.sleep(1) + + yield + + subprocess.run(["docker", "compose", "down", "-v"], check=True) @pytest.fixture def static_cache_dir(): diff --git a/tests/test_1.py b/tests/test_1.py new file mode 100644 index 000000000..318fa83c1 --- /dev/null +++ b/tests/test_1.py @@ -0,0 +1,12 @@ +import pytest +import requests + +# Requesting the 'openml_docker_stack' fixture forces it to run! +def test_can_connect_to_local_docker(openml_docker_stack): + + # Try to talk to the V2 API we just built + response = requests.get("http://localhost:9001/docs") + + # If we get a 200 OK or 404 (Not Found), the server is alive. + # If it fails, this line will crash the test. + assert response.status_code in [200]