Skip to content

Commit 647b465

Browse files
committed
Merge branch 'develop'
2 parents 902c427 + d204131 commit 647b465

File tree

10 files changed

+433
-116
lines changed

10 files changed

+433
-116
lines changed

CHANGES.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,31 @@ Changelog
22
=========
33

44

5+
.. _changes-1_5_0:
6+
7+
1.5.0 (2024-10-11)
8+
~~~~~~~~~~~~~~~~~~
9+
10+
New features
11+
------------
12+
13+
+ `#160`_, `#161`_, `#163`_: Add class attributes to
14+
:class:`icat.ingest.IngestReader` to make some prescribed values in
15+
the transformation to ICAT data file format configurable.
16+
17+
Bug fixes and minor changes
18+
---------------------------
19+
20+
+ `#162`_: Minor updates in the tool chain
21+
+ `#164`_: Fix `dumpinvestigation.py` example script
22+
23+
.. _#160: https://github.com/icatproject/python-icat/issues/160
24+
.. _#161: https://github.com/icatproject/python-icat/pull/161
25+
.. _#162: https://github.com/icatproject/python-icat/pull/162
26+
.. _#163: https://github.com/icatproject/python-icat/pull/163
27+
.. _#164: https://github.com/icatproject/python-icat/pull/164
28+
29+
530
.. _changes-1_4_0:
631

732
1.4.0 (2024-08-30)

doc/examples/dumpinvestigation.py

Lines changed: 163 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -17,130 +17,192 @@
1717

1818
logging.basicConfig(level=logging.INFO)
1919

20-
formats = icat.dumpfile.Backends.keys()
21-
config = icat.config.Config()
22-
config.add_variable('file', ("-o", "--outputfile"),
23-
dict(help="output file name or '-' for stdout"),
24-
default='-')
25-
config.add_variable('format', ("-f", "--format"),
26-
dict(help="output file format", choices=formats),
27-
default='YAML')
28-
config.add_variable('investigation', ("investigation",),
29-
dict(help="name and optionally visit id "
30-
"(separated by a colon) of the investigation"))
31-
client, conf = config.getconfig()
32-
33-
if client.apiversion < '4.4':
34-
raise RuntimeError("Sorry, ICAT version %s is too old, need 4.4.0 or newer."
35-
% client.apiversion)
36-
client.login(conf.auth, conf.credentials)
37-
38-
3920
# ------------------------------------------------------------
4021
# helper
4122
# ------------------------------------------------------------
4223

43-
def getinvestigation(invid):
24+
def get_investigation_id(client, invid):
4425
"""Search the investigation id from name and optionally visitid."""
26+
query = Query(client, "Investigation", attributes=["id"])
4527
l = invid.split(':')
46-
if len(l) == 1:
47-
# No colon, invid == name
48-
searchexp = "Investigation.id [name='%s']" % tuple(l)
49-
elif len(l) == 2:
28+
query.addConditions({"name": "= '%s'" % l[0]})
29+
if len(l) == 2:
5030
# one colon, invid == name:visitId
51-
searchexp = "Investigation.id [name='%s' AND visitId='%s']" % tuple(l)
31+
query.addConditions({"visitId": "= '%s'" % l[1]})
5232
else:
5333
# too many colons
5434
raise RuntimeError("Invalid investigation identifier '%s'" % invid)
55-
return (client.assertedSearch(searchexp)[0])
35+
return client.assertedSearch(query)[0]
5636

57-
def mergesearch(sexps):
37+
def mergesearch(client, queries):
5838
"""Do many searches and merge the results in one list excluding dups."""
5939
objs = set()
60-
for se in sexps:
40+
for se in queries:
6141
objs.update(client.search(se))
6242
return list(objs)
6343

44+
# The following helper functions control what ICAT objects are written
45+
# in each of the dumpfile chunks. There are three options for the
46+
# items in each list: either queries expressed as Query objects, or
47+
# queries expressed as string expressions, or lists of objects. In
48+
# the first two cases, the search results will be written, in the last
49+
# case, the objects are written as provided.
50+
51+
def get_auth_types(client, invid):
52+
"""Users and groups related to the investigation.
53+
"""
54+
# We need the users related to our investigation via
55+
# InvestigationUser, the users member of one of the groups related
56+
# via InvestigationGroup, and the instrument scientists from the
57+
# instruments related to the investigations. These are
58+
# independent searches, but the results are likely to overlap. So
59+
# we need to search and merge results first.
60+
usersearch = [
61+
Query(client, "User", conditions={
62+
"investigationUsers."
63+
"investigation.id": "= %d" % invid,
64+
}),
65+
Query(client, "User", conditions={
66+
"userGroups.grouping.investigationGroups."
67+
"investigation.id": "= %d" % invid,
68+
}),
69+
Query(client, "User", conditions={
70+
"instrumentScientists.instrument.investigationInstruments."
71+
"investigation.id": "= %d" % invid,
72+
}),
73+
]
74+
return [
75+
mergesearch(client, usersearch),
76+
Query(client, "Grouping", conditions={
77+
"investigationGroups.investigation.id": "= %d" % invid,
78+
}, includes=["userGroups.user"], aggregate="DISTINCT", order=True),
79+
]
80+
81+
def get_static_types(client, invid):
82+
"""Static stuff that exists independently of the investigation in ICAT.
83+
"""
84+
# Similar situation for ParameterType as for User: need to merge
85+
# ParameterType used for InvestigationParameter, SampleParameter,
86+
# DatasetParameter, and DatafileParameter.
87+
ptsearch = [
88+
Query(client, "ParameterType", conditions={
89+
"investigationParameters."
90+
"investigation.id": "= %d" % invid,
91+
}, includes=["facility", "permissibleStringValues"]),
92+
Query(client, "ParameterType", conditions={
93+
"sampleParameters.sample."
94+
"investigation.id": "= %d" % invid,
95+
}, includes=["facility", "permissibleStringValues"]),
96+
Query(client, "ParameterType", conditions={
97+
"datasetParameters.dataset."
98+
"investigation.id": "= %d" % invid,
99+
}, includes=["facility", "permissibleStringValues"]),
100+
Query(client, "ParameterType", conditions={
101+
"datafileParameters.datafile.dataset."
102+
"investigation.id": "= %d" % invid,
103+
}, includes=["facility", "permissibleStringValues"]),
104+
]
105+
return [
106+
Query(client, "Facility",
107+
conditions={
108+
"investigations.id": "= %d" % invid,
109+
},
110+
order=True),
111+
Query(client, "Instrument",
112+
conditions={
113+
"investigationInstruments.investigation.id": "= %d" % invid,
114+
},
115+
includes=["facility", "instrumentScientists.user"],
116+
order=True),
117+
mergesearch(client, ptsearch),
118+
Query(client, "InvestigationType",
119+
conditions={
120+
"investigations.id": "= %d" % invid,
121+
},
122+
includes=["facility"],
123+
order=True),
124+
Query(client, "SampleType",
125+
conditions={
126+
"samples.investigation.id": "= %d" % invid,
127+
},
128+
includes=["facility"],
129+
aggregate="DISTINCT",
130+
order=True),
131+
Query(client, "DatasetType",
132+
conditions={
133+
"datasets.investigation.id": "= %d" % invid,
134+
},
135+
includes=["facility"],
136+
aggregate="DISTINCT",
137+
order=True),
138+
Query(client, "DatafileFormat",
139+
conditions={
140+
"datafiles.dataset.investigation.id": "= %d" % invid,
141+
},
142+
includes=["facility"],
143+
aggregate="DISTINCT",
144+
order=True),
145+
]
146+
147+
def get_investigation_types(client, invid):
148+
"""The investigation and all the stuff that belongs to it.
149+
"""
150+
# The set of objects to be included in the Investigation.
151+
inv_includes = {
152+
"facility", "type.facility", "investigationInstruments",
153+
"investigationInstruments.instrument.facility", "shifts",
154+
"keywords", "publications", "investigationUsers",
155+
"investigationUsers.user", "investigationGroups",
156+
"investigationGroups.grouping", "parameters",
157+
"parameters.type.facility"
158+
}
159+
return [
160+
Query(client, "Investigation",
161+
conditions={"id":"in (%d)" % invid},
162+
includes=inv_includes),
163+
Query(client, "Sample",
164+
conditions={"investigation.id":"= %d" % invid},
165+
includes={"investigation", "type.facility",
166+
"parameters", "parameters.type.facility"},
167+
order=True),
168+
Query(client, "Dataset",
169+
conditions={"investigation.id":"= %d" % invid},
170+
includes={"investigation", "type.facility", "sample",
171+
"parameters", "parameters.type.facility"},
172+
order=True),
173+
Query(client, "Datafile",
174+
conditions={"dataset.investigation.id":"= %d" % invid},
175+
includes={"dataset", "datafileFormat.facility",
176+
"parameters", "parameters.type.facility"},
177+
order=True)
178+
]
64179

65180
# ------------------------------------------------------------
66181
# Do it
67182
# ------------------------------------------------------------
68183

69-
invid = getinvestigation(conf.investigation)
70-
184+
formats = icat.dumpfile.Backends.keys()
185+
config = icat.config.Config()
186+
config.add_variable('file', ("-o", "--outputfile"),
187+
dict(help="output file name or '-' for stdout"),
188+
default='-')
189+
config.add_variable('format', ("-f", "--format"),
190+
dict(help="output file format", choices=formats),
191+
default='YAML')
192+
config.add_variable('investigation', ("investigation",),
193+
dict(help="name and optionally visit id "
194+
"(separated by a colon) of the investigation"))
195+
client, conf = config.getconfig()
71196

72-
# We need the users related to our investigation via
73-
# InvestigationUser, the users member of one of the groups related via
74-
# InvestigationGroup, and the instrument scientists from the
75-
# instruments related to the investigations. These are independent
76-
# searches, but the results are likely to overlap. So we need to
77-
# search and merge results first. Similar situation for ParameterType.
78-
usersearch = [("User <-> InvestigationUser <-> Investigation [id=%d]"),
79-
("User <-> UserGroup <-> Grouping <-> InvestigationGroup "
80-
"<-> Investigation [id=%d]"),
81-
("User <-> InstrumentScientist <-> Instrument "
82-
"<-> InvestigationInstrument <-> Investigation [id=%d]")]
83-
ptsearch = [("ParameterType INCLUDE Facility, PermissibleStringValue "
84-
"<-> InvestigationParameter <-> Investigation [id=%d]"),
85-
("ParameterType INCLUDE Facility, PermissibleStringValue "
86-
"<-> SampleParameter <-> Sample <-> Investigation [id=%d]"),
87-
("ParameterType INCLUDE Facility, PermissibleStringValue "
88-
"<-> DatasetParameter <-> Dataset <-> Investigation [id=%d]"),
89-
("ParameterType INCLUDE Facility, PermissibleStringValue "
90-
"<-> DatafileParameter <-> Datafile <-> Dataset "
91-
"<-> Investigation [id=%d]"), ]
197+
if client.apiversion < '4.4':
198+
raise RuntimeError("Sorry, ICAT version %s is too old, need 4.4.0 or newer."
199+
% client.apiversion)
200+
client.login(conf.auth, conf.credentials)
92201

93-
# The set of objects to be included in the Investigation.
94-
inv_includes = { "facility", "type.facility", "investigationInstruments",
95-
"investigationInstruments.instrument.facility", "shifts",
96-
"keywords", "publications", "investigationUsers",
97-
"investigationUsers.user", "investigationGroups",
98-
"investigationGroups.grouping", "parameters",
99-
"parameters.type.facility" }
100202

101-
# The following lists control what ICAT objects are written in each of
102-
# the dumpfile chunks. There are three options for the items in each
103-
# list: either queries expressed as Query objects, or queries
104-
# expressed as string expressions, or lists of objects. In the first
105-
# two cases, the seacrh results will be written, in the last case, the
106-
# objects are written as provided. We assume that there is only one
107-
# relevant facility, e.g. that all objects related to the
108-
# investigation are related to the same facility. We may thus ommit
109-
# the facility from the ORDER BY clauses.
110-
authtypes = [mergesearch([s % invid for s in usersearch]),
111-
("Grouping ORDER BY name INCLUDE UserGroup, User "
112-
"<-> InvestigationGroup <-> Investigation [id=%d]" % invid)]
113-
statictypes = [("Facility ORDER BY name"),
114-
("Instrument ORDER BY name "
115-
"INCLUDE Facility, InstrumentScientist, User "
116-
"<-> InvestigationInstrument <-> Investigation [id=%d]"
117-
% invid),
118-
(mergesearch([s % invid for s in ptsearch])),
119-
("InvestigationType ORDER BY name INCLUDE Facility "
120-
"<-> Investigation [id=%d]" % invid),
121-
("SampleType ORDER BY name, molecularFormula INCLUDE Facility "
122-
"<-> Sample <-> Investigation [id=%d]" % invid),
123-
("DatasetType ORDER BY name INCLUDE Facility "
124-
"<-> Dataset <-> Investigation [id=%d]" % invid),
125-
("DatafileFormat ORDER BY name, version INCLUDE Facility "
126-
"<-> Datafile <-> Dataset <-> Investigation [id=%d]" % invid)]
127-
investtypes = [Query(client, "Investigation",
128-
conditions={"id":"in (%d)" % invid},
129-
includes=inv_includes),
130-
Query(client, "Sample", order=["name"],
131-
conditions={"investigation.id":"= %d" % invid},
132-
includes={"investigation", "type.facility",
133-
"parameters", "parameters.type.facility"}),
134-
Query(client, "Dataset", order=["name"],
135-
conditions={"investigation.id":"= %d" % invid},
136-
includes={"investigation", "type.facility", "sample",
137-
"parameters", "parameters.type.facility"}),
138-
Query(client, "Datafile", order=["dataset.name", "name"],
139-
conditions={"dataset.investigation.id":"= %d" % invid},
140-
includes={"dataset", "datafileFormat.facility",
141-
"parameters", "parameters.type.facility"})]
203+
invid = get_investigation_id(client, conf.investigation)
142204

143205
with open_dumpfile(client, conf.file, conf.format, 'w') as dumpfile:
144-
dumpfile.writedata(authtypes)
145-
dumpfile.writedata(statictypes)
146-
dumpfile.writedata(investtypes)
206+
dumpfile.writedata(get_auth_types(client, invid))
207+
dumpfile.writedata(get_static_types(client, invid))
208+
dumpfile.writedata(get_investigation_types(client, invid))

doc/examples/ingest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
The script takes the name of an investigation as argument. The
2525
investigation MUST exist in ICAT beforehand and all datasets in the
2626
input directory MUST belong to this investigation. The script will
27-
create tha datasets in ICAT, e.g. they MUST NOT exist in ICAT
27+
create the datasets in ICAT, e.g. they MUST NOT exist in ICAT
2828
beforehand. The metadata input file may contain attributes and
2929
related objects (datasetInstrument, datasetTechnique,
3030
datasetParameter) for the datasets provided in the input directory.

doc/src/conf.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# full list see the documentation:
77
# http://www.sphinx-doc.org/en/master/config
88

9+
import os
910
from pathlib import Path
1011
import sys
1112

@@ -135,6 +136,15 @@ def make_meta_rst(last_release):
135136
#
136137
# html_theme_options = {}
137138

139+
# Define the canonical URL if you are using a custom domain on Read the Docs
140+
html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
141+
142+
# Tell Jinja2 templates the build is running on Read the Docs
143+
if os.environ.get("READTHEDOCS", "") == "True":
144+
if "html_context" not in globals():
145+
html_context = {}
146+
html_context["READTHEDOCS"] = True
147+
138148
# Add any paths that contain custom static files (such as style sheets) here,
139149
# relative to this directory. They are copied after the builtin static files,
140150
# so a file named "default.css" will overwrite the builtin "default.css".

doc/src/ingest.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ format of the input files may be customized to some extent by
2323
providing custom versions of XSD and XSLT files, see
2424
:ref:`ingest-customize` below.
2525

26+
Some attributes and relations of the ``Dataset`` objects are
27+
prescribed during the transformation into ICAT data file format,
28+
namely the ``complete`` attribute and the name of the ``DatasetType``
29+
to relate them to. The prescribed values are set in class attributes
30+
:attr:`~icat.ingest.IngestReader.Dataset_complete` and
31+
:attr:`~icat.ingest.IngestReader.DatasetType_name` respectively. They
32+
may be customized by overriding these class attributes.
33+
2634
The ``Dataset`` objects in the input will not be created by
2735
:class:`~icat.ingest.IngestReader`, because it is assumed that a
2836
separate workflow in the caller will copy the content of datafiles to

etc/ingest.xslt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,22 @@
2323
<xsl:template match="/icatingest/data/dataset">
2424
<dataset>
2525
<xsl:copy-of select="@id"/>
26-
<complete>false</complete>
26+
<xsl:element name="complete">
27+
<xsl:value-of
28+
select="/icatingest/_environment/@dataset_complete"/>
29+
</xsl:element>
2730
<xsl:copy-of select="description"/>
2831
<xsl:copy-of select="endDate"/>
2932
<xsl:copy-of select="name"/>
3033
<xsl:copy-of select="startDate"/>
3134
<investigation ref="_Investigation"/>
3235
<xsl:apply-templates select="sample"/>
33-
<type name="raw"/>
36+
<xsl:element name="type">
37+
<xsl:attribute name="name">
38+
<xsl:value-of
39+
select="/icatingest/_environment/@datasettype_name"/>
40+
</xsl:attribute>
41+
</xsl:element>
3442
<xsl:copy-of select="datasetInstruments"/>
3543
<xsl:copy-of select="datasetTechniques"/>
3644
<xsl:copy-of select="parameters"/>

0 commit comments

Comments
 (0)