Skip to content

Commit d138f26

Browse files
authored
Simplify zenodo publishing (#1571)
* Simplified zenodo publishing * Fixed pakcage uploading logic * Skip zenodo tests on windows * Fixed zenodo result.url * Fixed tests
1 parent 2c4ea30 commit d138f26

File tree

5 files changed

+197
-150
lines changed

5 files changed

+197
-150
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
interactions:
2+
- request:
3+
body: '{}'
4+
headers:
5+
Accept:
6+
- '*/*'
7+
Accept-Encoding:
8+
- gzip, deflate
9+
Connection:
10+
- keep-alive
11+
Content-Length:
12+
- '2'
13+
Content-Type:
14+
- application/json
15+
User-Agent:
16+
- python-requests/2.31.0
17+
method: POST
18+
uri: https://zenodo.org/api//deposit/depositions
19+
response:
20+
body:
21+
string: '{"message":"The server could not verify that you are authorized to
22+
access the URL requested. You either supplied the wrong credentials (e.g.
23+
a bad password), or your browser doesn''t understand how to supply the credentials
24+
required.","status":401}
25+
26+
'
27+
headers:
28+
Access-Control-Allow-Origin:
29+
- '*'
30+
Access-Control-Expose-Headers:
31+
- Content-Type, ETag, Link, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset
32+
Content-Length:
33+
- '248'
34+
Content-Type:
35+
- application/json
36+
Date:
37+
- Tue, 18 Jul 2023 16:28:55 GMT
38+
Referrer-Policy:
39+
- strict-origin-when-cross-origin
40+
Retry-After:
41+
- '60'
42+
Server:
43+
- nginx
44+
Strict-Transport-Security:
45+
- max-age=0
46+
X-Content-Type-Options:
47+
- nosniff
48+
X-Frame-Options:
49+
- sameorigin
50+
X-RateLimit-Limit:
51+
- '60'
52+
X-RateLimit-Remaining:
53+
- '59'
54+
X-RateLimit-Reset:
55+
- '1689697796'
56+
X-XSS-Protection:
57+
- 1; mode=block
58+
status:
59+
code: 401
60+
message: UNAUTHORIZED
61+
version: 1

frictionless/portals/zenodo/adapter.py

Lines changed: 76 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@
22

33
import datetime
44
import json
5-
import os
65
import tempfile
76
from pathlib import Path
8-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
7+
from typing import TYPE_CHECKING, Any, Dict, List, Union, cast
98

10-
from ... import helpers, models
9+
from ... import models
1110
from ...catalog import Catalog, Dataset
1211
from ...exception import FrictionlessException
1312
from ...package import Package
1413
from ...platform import platform
1514
from ...resource import Resource
1615
from ...system import Adapter
1716
from .control import ZenodoControl
17+
from .models import ZenodoCreator, ZenodoMetadata
1818

1919
if TYPE_CHECKING:
2020
from pyzenodo3 import Record # type: ignore
@@ -52,83 +52,95 @@ def read_package(self) -> Package:
5252

5353
def write_package(self, package: Package):
5454
client = platform.pyzenodo3_upload
55+
client.BASE_URL = self.control.base_url
5556

56-
assert self.control.base_url
57-
assert self.control.apikey
58-
client.BASE_URL = self.control.base_url # type: ignore
59-
metafn = self.control.metafn
60-
61-
if not metafn:
62-
meta_data = generate_metadata(package)
63-
with tempfile.NamedTemporaryFile("wt", delete=False) as file:
64-
json.dump(meta_data, file, indent=2)
65-
metafn = file.name
66-
67-
if metafn:
68-
# Check if metadata is a JSON Object
69-
if isinstance(metafn, dict):
70-
meta_data = generate_metadata(metadata=metafn)
71-
with tempfile.NamedTemporaryFile("wt", delete=False) as file:
72-
json.dump(meta_data, file, indent=2)
73-
metafn = file.name
57+
# Ensure api key
58+
if not self.control.apikey:
59+
raise FrictionlessException("Api key is required for zenodo publishing")
7460

7561
try:
62+
# Ensure deposition
7663
deposition_id = self.control.deposition_id
7764
if not deposition_id:
78-
# Create a deposition resource
79-
deposition_id = client.create( # type: ignore
80-
token=self.control.apikey, base_url=self.control.base_url
65+
deposition_id = cast(
66+
int,
67+
client.create(
68+
token=self.control.apikey,
69+
base_url=self.control.base_url,
70+
),
8171
)
82-
metafn = Path(metafn).expanduser()
83-
client.upload_meta( # type: ignore
84-
token=self.control.apikey,
85-
metafn=metafn,
86-
depid=deposition_id, # type: ignore
87-
)
8872

89-
# Process resources
90-
resources: List[Path] = []
91-
for key, resource in enumerate(package.resources):
92-
if resource.data:
93-
resource_file_name = f"{resource.name}.json" or f"resource{key}.json"
94-
resource_path = os.path.join(
95-
self.control.tmp_path or "", resource_file_name
73+
# Generate metadata
74+
if self.control.metafn:
75+
descriptor = json.loads(Path(self.control.metafn).read_text())
76+
meta = descriptor.get("metadata", {})
77+
meta.setdefault("publication_date", str(datetime.date.today()))
78+
meta.setdefault("access_right", "open")
79+
metadata = ZenodoMetadata(**meta)
80+
else:
81+
description = self.control.description or package.description or "About"
82+
license = "CC-BY-4.0"
83+
if package.licenses:
84+
license = package.licenses[0].get("name", license)
85+
metadata = ZenodoMetadata(
86+
title=self.control.title or package.title or "Title",
87+
description=description,
88+
license=license,
89+
publication_date=str(datetime.date.today()),
90+
)
91+
if self.control.author:
92+
metadata.creators.append(
93+
ZenodoCreator(
94+
name=self.control.author,
95+
affiliation=self.control.company,
96+
)
9697
)
97-
resource.to_json(resource_path)
98-
resources.append(Path(resource_path).expanduser())
99-
continue
100-
101-
resource_path = resource.path or ""
102-
if resource_path.startswith(("http://", "https://")):
103-
continue
104-
105-
if resource.basepath:
106-
resource_path = os.path.join(
107-
str(resource.basepath), str(resource.path)
98+
for contributor in package.contributors:
99+
metadata.creators.append(
100+
ZenodoCreator(
101+
name=contributor.get("title", "Title"),
102+
affiliation=contributor.get("organization"),
103+
)
108104
)
109-
resources.append(Path(resource_path).expanduser())
110-
package_path = os.path.join(self.control.tmp_path or "", "datapackage.json")
111-
package.to_json(package_path)
112105

113-
# Upload package and resources
114-
client.upload_data( # type: ignore
115-
token=self.control.apikey,
116-
datafn=Path(package_path).expanduser(),
117-
depid=deposition_id, # type: ignore
118-
base_url=self.control.base_url,
119-
)
120-
for resource_path in resources:
121-
resource_path = Path(resource_path).expanduser()
122-
client.upload_data( # type: ignore
106+
# Upload metadata
107+
with tempfile.NamedTemporaryFile("wt") as file:
108+
data = dict(metadata=metadata.model_dump(exclude_none=True))
109+
json.dump(data, file, indent=2)
110+
file.flush()
111+
client.upload_meta(
123112
token=self.control.apikey,
124-
datafn=resource_path,
125-
depid=deposition_id, # type: ignore
113+
metafn=file.name,
114+
depid=deposition_id,
115+
)
116+
117+
# Upload package
118+
with tempfile.TemporaryDirectory() as dir:
119+
path = Path(dir) / "datapackage.json"
120+
package.to_json(str(path))
121+
client.upload_data(
122+
token=self.control.apikey,
123+
datafn=path,
124+
depid=deposition_id,
126125
base_url=self.control.base_url,
127126
)
127+
128+
# Upload resource
129+
for resource in package.resources:
130+
if resource.normpath and not resource.remote:
131+
client.upload_data(
132+
token=self.control.apikey,
133+
datafn=Path(resource.normpath),
134+
depid=deposition_id,
135+
base_url=self.control.base_url,
136+
)
137+
138+
# Return result
128139
return models.PublishResult(
129-
url=f"https://zenodo.org/record/{deposition_id}",
140+
url=f"https://zenodo.org/deposit/{deposition_id}",
130141
context=dict(deposition_id=deposition_id),
131142
)
143+
132144
except Exception as exception:
133145
note = "Zenodo API error" + repr(exception)
134146
raise FrictionlessException(note)
@@ -226,60 +238,3 @@ def get_package(record: Record, title: str, formats: List[str]) -> Package: # t
226238
resource = Resource(path=file["key"]) # type: ignore
227239
package.add_resource(resource)
228240
return package
229-
230-
231-
def generate_metadata(
232-
package: Optional[Package] = None, *, metadata: Optional[Dict[str, Any]] = None
233-
) -> Dict[str, Any]:
234-
meta_data: Union[str, Dict[str, Any], None] = {"metadata": {}}
235-
if not metadata and not package:
236-
note = "Zenodo API Metadata Creation error: Either metadata or package should be provided to generate metadata."
237-
raise FrictionlessException(note)
238-
239-
if metadata:
240-
if (
241-
not metadata.get("title")
242-
or not metadata.get("description")
243-
or not metadata.get("creators")
244-
):
245-
note = "Zenodo API Metadata Creation error: missing title or description or creators."
246-
raise FrictionlessException(note)
247-
248-
meta_data["metadata"] = metadata
249-
if "keywords" not in meta_data["metadata"]:
250-
meta_data["metadata"]["keywords"] = ["frictionlessdata"]
251-
252-
return helpers.remove_non_values(meta_data)
253-
254-
assert package
255-
256-
if not package.title or not package.description or not package.contributors:
257-
note = "Zenodo API Metadata Creation error: Unable to read title or description or contributors from package descriptor."
258-
raise FrictionlessException(note)
259-
260-
meta_data["metadata"] = {
261-
"title": package.title,
262-
"description": package.description,
263-
"publication_date": package.created or str(datetime.datetime.now()),
264-
"upload_type": "dataset",
265-
"access_right": "open",
266-
}
267-
if package.licenses:
268-
meta_data["metadata"]["creators"] = package.licenses[0].get("name") # type: ignore
269-
270-
creators: List[Dict[str, Any]] = []
271-
for contributor in package.contributors:
272-
creators.append(
273-
{
274-
"name": contributor.get("title"),
275-
"affiliation": contributor.get("organization"),
276-
}
277-
)
278-
keywords = package.keywords or []
279-
if "frictionlessdata" not in package.keywords:
280-
keywords.append("frictionlessdata")
281-
282-
if creators:
283-
meta_data["metadata"]["creators"] = creators # type: ignore
284-
meta_data["metadata"]["keywords"] = keywords # type: ignore
285-
return helpers.remove_non_values(meta_data)

frictionless/portals/zenodo/control.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,16 @@ class ZenodoControl(Control):
4141
token has to have deposit:write access.
4242
"""
4343

44-
base_url: Optional[str] = BASE_URL
44+
base_url: str = BASE_URL
4545
"""Endpoint for zenodo. By default it is set to live site (https://zenodo.org/api). For testing upload,
4646
we can use sandbox for example, https://sandbox.zenodo.org/api. Sandbox doesnot work for
4747
reading."""
4848

49+
title: Optional[str] = None
50+
description: Optional[str] = None
51+
author: Optional[str] = None
52+
company: Optional[str] = None
53+
4954
bounds: Optional[str] = None
5055
"""Return records filtered by a geolocation bounding box.
5156
For example, (Format bounds=143.37158,-38.99357,146.90918,-37.35269)"""
@@ -70,6 +75,7 @@ class ZenodoControl(Control):
7075
name: Optional[str] = None
7176
"""Custom name for a catalog or a package. Default name is 'catalog' or 'package'"""
7277

78+
# TODO: remove
7379
metafn: Optional[str] = None
7480
"""Metadata file path for deposition resource. Deposition resource is used for uploading
7581
and editing records on Zenodo."""
@@ -114,7 +120,10 @@ class ZenodoControl(Control):
114120
"all_versions": {"type": "integer"},
115121
"apikey": {"type": "string"},
116122
"base_url": {"type": "string"},
117-
"bounds": {"type": "string"},
123+
"title": {"type": "string"},
124+
"description": {"type": "string"},
125+
"author": {"type": "string"},
126+
"company": {"type": "string"},
118127
"communities": {"type": "string"},
119128
"deposition_id": {"type": "integer"},
120129
"doi": {"type": "string"},
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from __future__ import annotations
2+
3+
from typing import List, Literal, Optional
4+
5+
from pydantic import BaseModel
6+
7+
8+
class ZenodoCreator(BaseModel):
9+
name: str
10+
affiliation: Optional[str] = None
11+
orcid: Optional[str] = None
12+
13+
14+
class ZenodoMetadata(BaseModel):
15+
title: str
16+
description: str
17+
publication_date: str
18+
license: Optional[str] = None
19+
upload_type: Literal["dataset"] = "dataset"
20+
access_right: Literal["open"] = "open"
21+
creators: List[ZenodoCreator] = []

0 commit comments

Comments
 (0)