Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/workflows/test.yml
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there should be following steps to setup the docker services, though these should be ignored for window machines

  1. clone the repo, which you are already doing
  2. run the docker services
  3. verify that localhost endpoints are live

Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ jobs:
echo "BEFORE=$git_status" >> $GITHUB_ENV
echo "Repository status before tests: $git_status"

- name: Checkout server-api and patch Docker path
shell: bash
run: |
git clone --depth 1 https://github.com/openml/server-api.git server-api
sed -i 's|\.\./server-api|./server-api|g' docker-compose.yml

- name: Show installed dependencies
run: python -m pip list

Expand Down Expand Up @@ -139,6 +145,13 @@ jobs:
run: | # we need a separate step because of the bash-specific if-statement in the previous one.
pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"

- name: Cleanup Docker setup
if: always()
shell: bash
run: |
rm -rf server-api
git checkout docker-compose.yml

- name: Check for files left behind by test
if: matrix.os != 'windows-latest' && always()
run: |
Expand Down
52 changes: 52 additions & 0 deletions docker-compose.yml
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be removed

Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
services:
database:
image: "openml/test-database:20240105"
container_name: "openml-test-db-ci"
environment:
MYSQL_ROOT_PASSWORD: ok
ports:
- "33060:3306"
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
start_period: 30s
interval: 5s
retries: 10

database-setup:
image: mysql
container_name: "openml-test-setup-ci"
volumes:
- ./docker/update.sh:/database-update.sh
command: /bin/sh -c "/database-update.sh"
depends_on:
database:
condition: service_healthy

# V1 API (PHP)
php-api:
image: "openml/php-rest-api:v1.2.2"
container_name: "openml-php-api-ci"
ports:
- "9002:80"
depends_on:
database:
condition: service_started
environment:
- DB_HOST_OPENML=database:3306
- DB_HOST_EXPDB=database:3306
- BASE_URL=http://localhost:9002/
- INDEX_ES_DURING_STARTUP=false

# V2 API (PYTHON)
python-api:
container_name: "openml-python-api-ci"
build:
# TODO: replace with image when available
context: ../server-api
dockerfile: docker/python/Dockerfile
ports:
- "9001:8000"
depends_on:
- database
environment:
- DATABASE_URL=mysql://root:ok@database:3306/openml
31 changes: 31 additions & 0 deletions docker/update.sh
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be removed

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#/bin/bash
# Change the filepath of openml.file
# from "https://www.openml.org/data/download/1666876/phpFsFYVN"
# to "http://minio:9000/datasets/0000/0001/phpFsFYVN"
mysql -hdatabase -uroot -pok -e 'UPDATE openml.file SET filepath = CONCAT("http://minio:9000/datasets/0000/", LPAD(id, 4, "0"), "/", SUBSTRING_INDEX(filepath, "/", -1)) WHERE extension="arff";'

# Update openml.expdb.dataset with the same url
mysql -hdatabase -uroot -pok -e 'UPDATE openml_expdb.dataset DS, openml.file FL SET DS.url = FL.filepath WHERE DS.did = FL.id;'





# Create the data_feature_description TABLE. TODO: can we make sure this table exists already?
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `data_feature_description` (
`did` int unsigned NOT NULL,
`index` int unsigned NOT NULL,
`uploader` mediumint unsigned NOT NULL,
`date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`description_type` enum("plain", "ontology") NOT NULL,
`value` varchar(256) NOT NULL,
KEY `did` (`did`,`index`),
CONSTRAINT `data_feature_description_ibfk_1` FOREIGN KEY (`did`, `index`) REFERENCES `data_feature` (`did`, `index`) ON DELETE CASCADE ON UPDATE CASCADE
)'

# SET dataset 1 to active (used in unittests java)
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'INSERT IGNORE INTO dataset_status VALUES (1, "active", "2024-01-01 00:00:00", 1)'
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'DELETE FROM dataset_status WHERE did = 2 AND status = "deactivated";'

# Temporary fix in case the database missed the kaggle table. The PHP Rest API expects the table to be there, while indexing.
mysql -hdatabase -uroot -pok -Dopenml_expdb -e 'CREATE TABLE IF NOT EXISTS `kaggle` (`dataset_id` int(11) DEFAULT NULL, `kaggle_link` varchar(500) DEFAULT NULL)'
46 changes: 46 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from __future__ import annotations

import multiprocessing
import sys

import fasteners

multiprocessing.set_start_method("spawn", force=True)

Expand All @@ -35,6 +38,9 @@
import pytest
import openml_sklearn

import time
import subprocess
import requests
import openml
from openml.testing import TestBase

Expand Down Expand Up @@ -296,6 +302,46 @@ def with_test_cache(test_files_directory, request):
if tmp_cache.exists():
shutil.rmtree(tmp_cache)

def _is_server_responding():
"""Check if the Docker API is already listening."""
try:
requests.get("http://localhost:9001/api/v2/", timeout=1)
return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
return False

def _start_docker():
"""Logic to spin up the containers and wait for initialization."""
subprocess.run(["docker", "compose", "up", "-d"], check=True, capture_output=True, text=True)
subprocess.run(["docker", "wait", "openml-test-setup-ci"], check=True)

@pytest.fixture(scope="session", autouse=True)
def openml_docker_stack(tmp_path_factory, worker_id):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please undo these changes. this is a really good implementation but we don't want to manually setup the docker services when pytests are run, that could be problematic for developers setting up pytests (it also clones the other repo inside this which is really bad), though we'll give them an option to setup their docker services themselves and toggle the pytest behaviour to use localhosts via an env variable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it also clones the other repo inside this which is really bad

I am still reading all the reviews, but this does not happen locally, only in CI.

# Skip Docker setup in CI on Windows given docker images are for Linux
is_ci = os.environ.get("CI") == "true"
is_windows = sys.platform == "win32" or os.name == "nt"

if is_ci and is_windows:
yield
return

# For local development with single worker
if worker_id == "master":
_start_docker()
yield
subprocess.run(["docker", "compose", "down", "-v"], check=True)
return

# For CI with multiple workers (xdist)
root_tmp_dir = tmp_path_factory.getbasetemp().parent
lock_file = root_tmp_dir / "docker_setup.lock"

lock = fasteners.InterProcessLock(str(lock_file))
with lock:
if not _is_server_responding():
_start_docker()

yield

@pytest.fixture
def static_cache_dir():
Expand Down