diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..881f40671 --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, *, strict: bool = False) -> None: + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..bd93c3cad --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +API_V1_SERVER = "https://www.openml.org/api/v1/xml" +API_V2_SERVER = "http://127.0.0.1:8001" +API_KEY = "..." diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py new file mode 100644 index 000000000..8e6d1e4ce --- /dev/null +++ b/openml/_api/http/__init__.py @@ -0,0 +1,3 @@ +from openml._api.http.client import HTTPClient + +__all__ = ["HTTPClient"] diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py new file mode 100644 index 000000000..dea5de809 --- /dev/null +++ b/openml/_api/http/client.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any, Mapping + +import requests +from requests import Response + +from openml.__version__ import __version__ + + +class HTTPClient: + def __init__(self, base_url: str) -> None: + self.base_url = base_url + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + + def get( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: + url = f"{self.base_url}/{path}" + return requests.get(url, params=params, headers=self.headers, timeout=10) + + def post( + self, + path: str, + data: Mapping[str, Any] | None = None, + files: Any = None, + ) -> Response: + url = f"{self.base_url}/{path}" + return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) + + def delete( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: + url = f"{self.base_url}/{path}" + return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..f933f5d8b --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,5 @@ +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.evaluations import EvaluationsV1, EvaluationsV2 +from openml._api.resources.tasks import TasksV1, TasksV2 + +__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2", "EvaluationsV1", "EvaluationsV2"] diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py new file mode 100644 index 000000000..83c1c1718 --- /dev/null +++ b/openml/_api/resources/base.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from requests import Response + + from openml._api.http import HTTPClient + from openml.datasets.dataset import OpenMLDataset + from openml.evaluations.evaluation import OpenMLEvaluation + from openml.tasks.task import OpenMLTask + + +class ResourceAPI: + def __init__(self, http: HTTPClient): + self._http = http + + +class DatasetsAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... + + +class TasksAPI(ResourceAPI, ABC): + @abstractmethod + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... + + +class EvaluationsAPI(ResourceAPI, ABC): + @abstractmethod + def list(self, api_call: str) -> list[OpenMLEvaluation]: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..9ff1ec278 --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.resources.base import DatasetsAPI + +if TYPE_CHECKING: + from responses import Response + + from openml.datasets.dataset import OpenMLDataset + + +class DatasetsV1(DatasetsAPI): + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError + + +class DatasetsV2(DatasetsAPI): + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError diff --git a/openml/_api/resources/evaluations.py b/openml/_api/resources/evaluations.py new file mode 100644 index 000000000..e9ac1a012 --- /dev/null +++ b/openml/_api/resources/evaluations.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import json + +import xmltodict + +from openml._api.resources.base import EvaluationsAPI +from openml.evaluations.evaluation import OpenMLEvaluation + + +class EvaluationsV1(EvaluationsAPI): + """V1 API implementation for evaluations. + Fetches evaluations from the v1 XML API endpoint. + """ + + def list(self, api_call: str) -> list[OpenMLEvaluation]: + """Fetch and list evaluations from the OpenML API. + + Makes an API call to retrieve evaluation results, parses the XML response, + and converts it into OpenMLEvaluation objects. + + Parameters + ---------- + api_call : str + The API endpoint path (without base URL) to call for evaluations. + Example: "evaluation/list/function/predictive_accuracy/limit/10 + + Returns + ------- + list[OpenMLEvaluation] + A list of OpenMLEvaluation objects containing the parsed evaluations. + + Raises + ------ + ValueError + If the XML response does not contain the expected structure. + AssertionError + If the evaluation data is not in list format as expected. + + Notes + ----- + This method performs two API calls: + 1. Fetches evaluation data from the specified endpoint + 2. Fetches user information for all uploaders in the evaluation data + + The user information is used to map uploader IDs to usernames. + """ + eval_response = self._http.get(api_call) + xml_content = eval_response.text + + evals_dict = xmltodict.parse(xml_content, force_list=("oml:evaluation",)) + # Minimalistic check if the XML is useful + if "oml:evaluations" not in evals_dict: + raise ValueError( + "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}', + ) + + assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type( + evals_dict["oml:evaluations"]["oml:evaluation"], + ) + + uploader_ids = list( + {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]}, + ) + api_users = "user/list/user_id/" + ",".join(uploader_ids) + user_response = self._http.get(api_users) + xml_content_user = user_response.text + + users = xmltodict.parse(xml_content_user, force_list=("oml:user",)) + user_dict = { + user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"] + } + + evals = [] + for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]: + run_id = int(eval_["oml:run_id"]) + value = float(eval_["oml:value"]) if "oml:value" in eval_ else None + values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None + array_data = eval_.get("oml:array_data") + + evals.append( + OpenMLEvaluation( + run_id=run_id, + task_id=int(eval_["oml:task_id"]), + setup_id=int(eval_["oml:setup_id"]), + flow_id=int(eval_["oml:flow_id"]), + flow_name=eval_["oml:flow_name"], + data_id=int(eval_["oml:data_id"]), + data_name=eval_["oml:data_name"], + function=eval_["oml:function"], + upload_time=eval_["oml:upload_time"], + uploader=int(eval_["oml:uploader"]), + uploader_name=user_dict[eval_["oml:uploader"]], + value=value, + values=values, + array_data=array_data, + ) + ) + + return evals + + +class EvaluationsV2(EvaluationsAPI): + """V2 API implementation for evaluations. + Fetches evaluations from the v2 json API endpoint. + """ + + def list(self, api_call: str) -> list[OpenMLEvaluation]: + """Fetch and list evaluations from the OpenML API. + + Makes an API call to retrieve evaluation results, parses the json response, + and converts it into OpenMLEvaluation objects. + + Parameters + ---------- + api_call : str + The API endpoint path (without base URL) to call for evaluations. + Example: "evaluation/list/function/predictive_accuracy/limit/10 + + Returns + ------- + list[OpenMLEvaluation] + A list of OpenMLEvaluation objects containing the parsed evaluations. + + Raises + ------ + NotImplementedError + + Notes + ----- + This method performs two API calls: + 1. Fetches evaluation data from the specified endpoint + 2. Fetches user information for all uploaders in the evaluation data + + The user information is used to map uploader IDs to usernames. + """ + raise NotImplementedError("V2 API implementation is not yet available") diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..f494fb9a3 --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import xmltodict + +from openml._api.resources.base import TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + +if TYPE_CHECKING: + from requests import Response + + +class TasksV1(TasksAPI): + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + path = f"task/{task_id}" + response = self._http.get(path) + xml_content = response.text + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(TasksAPI): + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + raise NotImplementedError diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..2cf8c882e --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from openml._api.config import ( + API_V1_SERVER, + API_V2_SERVER, +) +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + EvaluationsV1, + EvaluationsV2, + TasksV1, + TasksV2, +) + +if TYPE_CHECKING: + from openml._api.resources.base import DatasetsAPI, EvaluationsAPI, TasksAPI + + +class APIBackend: + def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI, evaluations: EvaluationsAPI): + self.datasets = datasets + self.tasks = tasks + self.evaluations = evaluations + + +def build_backend(version: str, *, strict: bool) -> APIBackend: + v1_http = HTTPClient(API_V1_SERVER) + v2_http = HTTPClient(API_V2_SERVER) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http), tasks=TasksV1(v1_http), evaluations=EvaluationsV1(v1_http) + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http), tasks=TasksV2(v2_http), evaluations=EvaluationsV2(v2_http) + ) + + if strict: + return v2 + + return v1 + + +class APIContext: + def __init__(self) -> None: + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, *, strict: bool = False) -> None: + self._backend = build_backend(version=version, strict=strict) + + @property + def backend(self) -> APIBackend: + return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py new file mode 100644 index 000000000..1bc99d270 --- /dev/null +++ b/openml/_api/runtime/fallback.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + + +class FallbackProxy: + def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): + self._primary = primary + self._fallback = fallback diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index 7747294d7..3de1388ca 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -2,10 +2,9 @@ # ruff: noqa: PLR0913 from __future__ import annotations -import json from functools import partial from itertools import chain -from typing import Any +from typing import TYPE_CHECKING, Any from typing_extensions import Literal, overload import numpy as np @@ -15,7 +14,10 @@ import openml import openml._api_calls import openml.utils -from openml.evaluations import OpenMLEvaluation +from openml._api import api_context + +if TYPE_CHECKING: + from openml.evaluations import OpenMLEvaluation @overload @@ -223,54 +225,7 @@ def _list_evaluations( # noqa: C901 def __list_evaluations(api_call: str) -> list[OpenMLEvaluation]: """Helper function to parse API calls which are lists of runs""" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - evals_dict = xmltodict.parse(xml_string, force_list=("oml:evaluation",)) - # Minimalistic check if the XML is useful - if "oml:evaluations" not in evals_dict: - raise ValueError( - "Error in return XML, does not contain " f'"oml:evaluations": {evals_dict!s}', - ) - - assert isinstance(evals_dict["oml:evaluations"]["oml:evaluation"], list), type( - evals_dict["oml:evaluations"], - ) - - uploader_ids = list( - {eval_["oml:uploader"] for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]}, - ) - api_users = "user/list/user_id/" + ",".join(uploader_ids) - xml_string_user = openml._api_calls._perform_api_call(api_users, "get") - - users = xmltodict.parse(xml_string_user, force_list=("oml:user",)) - user_dict = {user["oml:id"]: user["oml:username"] for user in users["oml:users"]["oml:user"]} - - evals = [] - for eval_ in evals_dict["oml:evaluations"]["oml:evaluation"]: - run_id = int(eval_["oml:run_id"]) - value = float(eval_["oml:value"]) if "oml:value" in eval_ else None - values = json.loads(eval_["oml:values"]) if eval_.get("oml:values", None) else None - array_data = eval_.get("oml:array_data") - - evals.append( - OpenMLEvaluation( - run_id=run_id, - task_id=int(eval_["oml:task_id"]), - setup_id=int(eval_["oml:setup_id"]), - flow_id=int(eval_["oml:flow_id"]), - flow_name=eval_["oml:flow_name"], - data_id=int(eval_["oml:data_id"]), - data_name=eval_["oml:data_name"], - function=eval_["oml:function"], - upload_time=eval_["oml:upload_time"], - uploader=int(eval_["oml:uploader"]), - uploader_name=user_dict[eval_["oml:uploader"]], - value=value, - values=values, - array_data=array_data, - ) - ) - - return evals + return api_context.backend.evaluations.list(api_call) def list_evaluation_measures() -> list[str]: diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index e9b879ae4..a794ad56d 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,6 +12,7 @@ import openml._api_calls import openml.utils +from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -444,11 +445,16 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") + result = api_context.backend.tasks.get(task_id, return_response=True) - with xml_file.open("w", encoding="utf8") as fh: - fh.write(task_xml) - return _create_task_from_xml(task_xml) + if isinstance(result, tuple): + task, response = result + with xml_file.open("w", encoding="utf8") as fh: + fh.write(response.text) + else: + task = result + + return task def _create_task_from_xml(xml: str) -> OpenMLTask: