icatproject
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.readthedocs.yaml‎
Lines changed: 21 additions & 0 deletions b/‎.readthedocs.yaml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.rtd-require‎
Lines changed: 6 additions & 3 deletions b/‎.rtd-require‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎CHANGES.rst‎
Lines changed: 55 additions & 0 deletions b/‎CHANGES.rst‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 4 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 3 additions & 1 deletion b/‎Makefile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.rst‎
Lines changed: 5 additions & 2 deletions b/‎README.rst‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎doc/examples/create-datafile.py‎
Lines changed: 2 additions & 1 deletion b/‎doc/examples/create-datafile.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/examples/ingest.py‎
Lines changed: 153 additions & 0 deletions b/‎doc/examples/ingest.py‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎doc/examples/metadata-4.4-inl.xml‎
Lines changed: 69 additions & 0 deletions b/‎doc/examples/metadata-4.4-inl.xml‎
Lines changed: 69 additions & 0 deletions
@@ -9,4 +9,8 @@ __pycache__/
 /tests/data/icatdump-*.xml
 /tests/data/icatdump-*.yaml
 /tests/data/ingest-*.xml
+/tests/data/ingest-*.xsd
+/tests/data/ingest.xslt
+/tests/data/metadata-*-inl.xml
+/tests/data/metadata-*-sep.xml
 /tests/scripts/
@@ -0,0 +1,21 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+  jobs:
+    post_checkout:
+      - git fetch --unshallow
+    post_install:
+      - python setup.py meta
+
+sphinx:
+  configuration: doc/src/conf.py
+
+python:
+  install:
+    - requirements: .rtd-require
@@ -1,5 +1,8 @@
-docutils<0.18
-setuptools_scm
-suds-community
 PyYAML
 lxml
+packaging
+setuptools
+setuptools_scm
+suds
+sphinx>=2,<3
+sphinx-rtd-theme>=0.5,<1
@@ -2,6 +2,61 @@ Changelog
 =========
 
 
+1.1.0 (2023-06-30)
+~~~~~~~~~~~~~~~~~~
+
+New features
+------------
+
++ `#113`_, `#123`_: Add module :mod:`icat.ingest`.
+
++ `#124`_: Add an optional keyword argument `keepInstRel` to
+  :meth:`icat.entity.Entity.truncateRelations`.
+
+Bug fixes and minor changes
+---------------------------
+
++ `#126`_, `#127`_: Update outdated documentation.
+
++ `#112`_, `#118`_: Extend icatdata XSD adding extra attributes to
+  reference objects.
+
++ `#111`_, `#121`_: Change the type of
+  :attr:`icat.client.Client.Register` to
+  :class:`weakref.WeakValueDictionary`, fixing a memory leak.
+
++ `#119`_, `#120`_: Remove `_config` attribute from
+  :class:`icat.config.Configuration`.
+
++ `#115`_, `#116`_: Fix the test suite to work if either PyYAML or
+  lxml is not available.
+
++ `#128`_: Return an empty list from
+  :func:`icat.dump_queries.getDataPublicationQueries` when talking to
+  an ICAT server older than 5.0.
+
++ `#117`_: Fixed deprecation warnings from upcoming Python 3.12.
+
++ `#129`_: Review the build of the documentation at Read the Docs.
+
+.. _#111: https://github.com/icatproject/python-icat/issues/111
+.. _#112: https://github.com/icatproject/python-icat/issues/112
+.. _#113: https://github.com/icatproject/python-icat/issues/113
+.. _#115: https://github.com/icatproject/python-icat/issues/115
+.. _#116: https://github.com/icatproject/python-icat/pull/116
+.. _#117: https://github.com/icatproject/python-icat/pull/117
+.. _#118: https://github.com/icatproject/python-icat/pull/118
+.. _#119: https://github.com/icatproject/python-icat/issues/119
+.. _#120: https://github.com/icatproject/python-icat/pull/120
+.. _#121: https://github.com/icatproject/python-icat/pull/121
+.. _#123: https://github.com/icatproject/python-icat/pull/123
+.. _#124: https://github.com/icatproject/python-icat/pull/124
+.. _#126: https://github.com/icatproject/python-icat/issues/126
+.. _#127: https://github.com/icatproject/python-icat/pull/127
+.. _#128: https://github.com/icatproject/python-icat/pull/128
+.. _#129: https://github.com/icatproject/python-icat/pull/129
+
+
 1.0.0 (2022-12-21)
 ~~~~~~~~~~~~~~~~~~
 
 
@@ -9,12 +9,16 @@ include doc/examples/icat.cfg
 include doc/examples/icatdump-*.xml
 include doc/examples/icatdump-*.yaml
 include doc/examples/ingest-*.xml
+include doc/examples/metadata-*.xml
 include doc/icatdata*.xsd
 include doc/man/*
 include doc/tutorial/*.py
+include etc/ingest-*.xsd
+include etc/ingest.xslt
 include tests/conftest.py
 include tests/data/legacy-icatdump-*.xml
 include tests/data/legacy-icatdump-*.yaml
+include tests/data/metadata-5.0-badref.xml
 include tests/data/ref-icatdump-*.xml
 include tests/data/ref-icatdump-*.yaml
 include tests/data/summary*
 
@@ -18,9 +18,11 @@ doc-man: meta
 
 clean:
 	rm -rf build
-	rm -rf __pycache__
+	rm -rf __pycache__ icat/__pycache__
 	rm -rf tests/data/example_data.yaml
 	rm -rf tests/data/icatdump-* tests/data/ingest-*.xml
+	rm -rf tests/data/ingest-*.xsd tests/data/ingest.xslt
+	rm -rf tests/data/metadata-*-inl.xml tests/data/metadata-*-sep.xml
 	rm -rf tests/scripts
 
 distclean: clean
 
@@ -1,4 +1,7 @@
-|rtd| |pypi|
+|doi| |rtd| |pypi|
+
+.. |doi| image:: https://zenodo.org/badge/37250056.svg
+   :target: https://zenodo.org/badge/latestdoi/37250056
 
 .. |rtd| image:: https://img.shields.io/readthedocs/python-icat/latest
    :target: https://python-icat.readthedocs.io/en/latest/
@@ -47,7 +50,7 @@ the reason why the example scripts require PyYAML.
 Copyright and License
 ---------------------
 
-Copyright 2013–2022
+Copyright 2013–2023
 Helmholtz-Zentrum Berlin für Materialien und Energie GmbH
 
 Licensed under the `Apache License`_, Version 2.0 (the "License"); you
 
@@ -66,7 +66,8 @@
 investigation = client.assertedSearch(query)[0]
 
 fstats = df_path.stat()
-modTime = datetime.datetime.utcfromtimestamp(fstats.st_mtime).isoformat() + "Z"
+utc = datetime.timezone.utc
+modTime = datetime.datetime.fromtimestamp(fstats.st_mtime, tz=utc)
 datafile = client.new("Datafile")
 datafile.datafileFormat = dff
 datafile.name = conf.datafile.name
 
@@ -0,0 +1,153 @@
+#! /usr/bin/python3
+"""Ingest metadata into ICAT.
+
+This scripts demonstrates how to use class IngestReader from the
+icat.ingest module to read metadata from a file and add that to ICAT.
+The script intents to model the use case of ingesting raw datasets
+from the experiment.
+
+The script expects an input directory containing one metadata input
+file and one or more subdirectories for each dataset respectively,
+e.g. something like::
+
+  input_dir
+   ├── metadata.xml
+   ├── dataset_1
+   │    ├── datafile_a.dat
+   │    ├── datafile_b.dat
+   │    └── datafile_c.dat
+   └── dataset_2
+        ├── datafile_d.dat
+        ├── datafile_e.dat
+        └── datafile_f.dat
+
+The script takes the name of an investigation as argument.  The
+investigation MUST exist in ICAT beforehand and all datasets in the
+input directory MUST belong to this investigation.  The script will
+create tha datasets in ICAT, e.g. they MUST NOT exist in ICAT
+beforehand.  The metadata input file may contain attributes and
+related objects (datasetInstrument, datasetTechnique,
+datasetParameter) for the datasets provided in the input directory.
+The metadata input is restricted in that sense, e.g. this script
+enforces that the metadata does not contain any other input.
+
+The XML Schema Definition and XSL Transformation files (ingest.xsd and
+ingest.xslt) provided by python-icat (or customized versions thereof)
+need to be installed so that class IngestReader will find them
+(e.g. in the IngestReader.SchemaDir directory).
+
+There are some limitations to keep things simple:
+
+* the script creates the dataset and datafile objects in ICAT, but
+  does not upload the file content to IDS.  In a real production
+  workflow, you'd probably have a separate step that copies the files
+  to the storage managed by IDS while creating the dataset and
+  datafile objects in ICAT at the same time.
+
+* the script does not care to add a datafileFormat or any descriptive
+  attributes (fileSize, checksum, datafileModTime) to the datafiles it
+  creates.
+
+* it is assumed that the investigation can be unambiguously found by
+  its name.
+
+* a real production workflow would probably apply much stricter
+  conformance checks on the input (e.g. restrictions on allowed
+  dataset or datafile names, make sure not to follow any symlinks from
+  the input directory) and have a more elaborated error handling.
+
+"""
+
+import logging
+from pathlib import Path
+import icat
+import icat.config
+from icat.ingest import IngestReader
+from icat.query import Query
+
+
+logging.basicConfig(level=logging.DEBUG)
+# Silence some rather chatty modules.
+logging.getLogger('suds.client').setLevel(logging.CRITICAL)
+logging.getLogger('suds').setLevel(logging.ERROR)
+
+logger = logging.getLogger(__name__)
+
+
+config = icat.config.Config(ids=False)
+config.add_variable('investigation', ("investigation",),
+                    dict(help="name of the investigation"))
+config.add_variable('inputdir', ("inputdir",),
+                    dict(help="path to the input directory"),
+                    type=Path)
+client, conf = config.getconfig()
+client.login(conf.auth, conf.credentials)
+
+query = Query(client, "Investigation", conditions={
+    "name": "= '%s'" % conf.investigation
+})
+investigation = client.assertedSearch(query)[0]
+
+
+class ContentError(RuntimeError):
+    """Some invalid content in the input directory.
+    """
+    def __init__(self, base, p, msg):
+        p = p.relative_to(base)
+        super().__init__("%s: %s" % (p, msg))
+
+
+def check(client, path, investigation):
+    """Verify the content of the input directory.
+
+    The idea is to check the input directory for conformance as much
+    as possible and to fail early if anything is not as required,
+    before having committed anything to ICAT.
+
+    Returns a tuple with two items: a list of datasets and an
+    IngestReader.
+    """
+    datasets = []
+    metadata_path = path / "metadata.xml"
+    for p0 in path.iterdir():
+        if p0.name.startswith('.') or p0 == metadata_path:
+            continue
+        elif p0.is_dir():
+            is_empty = True
+            dataset = client.new("dataset")
+            dataset.name = p0.name
+            dataset.complete = False
+            for p1 in p0.iterdir():
+                if p1.is_file():
+                    is_empty = False
+                    datafile = client.new("datafile")
+                    datafile.name = p1.name
+                    dataset.datafiles.append(datafile)
+                else:
+                    raise ContentError(path, p1, 'unexpected item')
+            if is_empty:
+                raise ContentError(path, p0, 'empty dataset directory')
+            datasets.append(dataset)
+        else:
+            raise ContentError(path, p0, 'unexpected item')
+    try:
+        reader = IngestReader(client, metadata_path, investigation)
+        reader.ingest(datasets, dry_run=True, update_ds=True)
+    except (icat.InvalidIngestFileError, icat.SearchResultError) as e:
+        raise ContentError(path, metadata_path,
+                           "%s: %s" % (type(e).__name__, e))
+    return (datasets, reader)
+
+logger.info("ingesting from directory %s into investigation %s",
+            conf.inputdir, investigation.name)
+datasets, reader = check(client, conf.inputdir, investigation)
+logger.debug("input directory checked, found %d datasets", len(datasets))
+for ds in datasets:
+    ds.create()
+    ds.truncateRelations(keepInstRel=True)
+    logger.debug("created dataset %s", ds.name)
+reader.ingest(datasets)
+for ds in datasets:
+    ds.complete = True
+    ds.update()
+logger.debug("ingest done")
@@ -0,0 +1,69 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<icatingest version="1.0">
+  <head>
+    <date>2023-06-16T11:01:15+02:00</date>
+    <generator>metadata-writer 0.27a</generator>
+  </head>
+  <data>
+    <dataset id="Dataset_1">
+      <name>testingest_inl_1</name>
+      <description>Dy01Cp02 at 2.7 K</description>
+      <startDate>2022-02-03T15:40:12+01:00</startDate>
+      <endDate>2022-02-03T17:04:22+01:00</endDate>
+      <parameters>
+        <stringValue>neutron</stringValue>
+        <type name="Probe"/>
+      </parameters>
+      <parameters>
+        <numericValue>5.3</numericValue>
+        <type name="Reactor power" units="MW"/>
+      </parameters>
+      <parameters>
+        <numericValue>2.74103</numericValue>
+        <rangeBottom>2.7408</rangeBottom>
+        <rangeTop>2.7414</rangeTop>
+        <type name="Sample temperature" units="K"/>
+      </parameters>
+      <parameters>
+        <numericValue>4.1357</numericValue>
+        <rangeBottom>4.0573</rangeBottom>
+        <rangeTop>4.1567</rangeTop>
+        <type name="Magnetic field" units="T"/>
+      </parameters>
+      <parameters>
+        <stringValue>Dy01Cp02</stringValue>
+        <type name="Comment"/>
+      </parameters>
+    </dataset>
+    <dataset id="Dataset_2">
+      <name>testingest_inl_2</name>
+      <description>Dy01Cp02 at 5.1 K</description>
+      <startDate>2022-02-03T17:13:10+01:00</startDate>
+      <endDate>2022-02-03T18:45:27+01:00</endDate>
+      <parameters>
+        <stringValue>neutron</stringValue>
+        <type name="Probe"/>
+      </parameters>
+      <parameters>
+        <numericValue>5.3</numericValue>
+        <type name="Reactor power" units="MW"/>
+      </parameters>
+      <parameters>
+        <numericValue>5.1239</numericValue>
+        <rangeBottom>5.1045</rangeBottom>
+        <rangeTop>5.1823</rangeTop>
+        <type name="Sample temperature" units="K"/>
+      </parameters>
+      <parameters>
+        <numericValue>3.9345</numericValue>
+        <rangeBottom>3.7253</rangeBottom>
+        <rangeTop>4.0365</rangeTop>
+        <type name="Magnetic field" units="T"/>
+      </parameters>
+      <parameters>
+        <stringValue>Dy01Cp02</stringValue>
+        <type name="Comment"/>
+      </parameters>
+    </dataset>
+  </data>
+</icatingest>