Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datahugger/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datahugger.services import DataEuropaDataset
from datahugger.services import DataOneDataset
from datahugger.services import DataverseDataset
from datahugger.services import DESYDataset
from datahugger.services import DjehutyDataset
from datahugger.services import DSpaceDataset
from datahugger.services import FigShareDataset
Expand Down Expand Up @@ -121,6 +122,7 @@
"www.uni-hildesheim.de": DataverseDataset,
"b2share.eudat.eu": B2shareDataset,
"data.europa.eu": DataEuropaDataset,
"public-doi.desy.de": DESYDataset,
}

# regexp lookup
Expand Down
37 changes: 37 additions & 0 deletions datahugger/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,40 @@ class B2shareDataset(DatasetDownloader):
ATTR_SIZE_JSONPATH = "size"
ATTR_HASH_JSONPATH = "checksum"
ATTR_HASH_TYPE_VALUE = "md5"


class DESYDataset(DatasetDownloader):
"""Downloader for DESY repository."""

REGEXP_ID = r"https://public-doi\.desy\.de/detail/(?P<record_id>.+)"

# the base entry point of the REST API
API_URL = "https://public-doi.desy.de/detail/"

@property
def files(self):
if hasattr(self, "_files"):
return self._files

headers = {"Accept": "application/metalink4+xml"}
res = requests.get(self.API_URL + self._params["record_id"], headers=headers)
res.raise_for_status()
ns = {"ml": "urn:ietf:params:xml:ns:metalink"}
meta_tree = ET.fromstring(res.content.decode("utf-8"))

x = []
for file in meta_tree.findall("ml:file", ns):
elem = {
"link": file.find("ml:url", ns).text,
"name": file.attrib.get("name"),
}
if file.find("ml:size", ns) is not None:
elem["size"] = int(file.find("ml:size", ns).text)
hash_elem = file.find("ml:hash", ns)
if hash_elem is not None:
elem["hash"] = hash_elem.text
elem["hash_type"] = hash_elem.attrib.get("type")
x.append(elem)

self._files = x
return self._files
4 changes: 4 additions & 0 deletions tests/test_repositories.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,7 @@ files = "consolidation-wattzhub-schema-irve-statique-20240220-152202.csv"
[[seanoe]]
location = "https://doi.org/10.17882/101042"
files = "111609.xlsx"

[[desy]]
location = "https://doi.org/10.60717/17114434-6bb9-4f3c-865f-59395b7c38c3"
files = "conda_env.yml"
Loading