From 10fa894501d6b1eb161373b789a417cab35dfb41 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 7 Jul 2023 16:12:51 +0000 Subject: [PATCH 001/112] Create initial GitHub Actions workflow --- .github/workflows/continuous-integration.yml | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/continuous-integration.yml diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml new file mode 100644 index 00000000..4888b07f --- /dev/null +++ b/.github/workflows/continuous-integration.yml @@ -0,0 +1,27 @@ +name: Python package + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Install poetry + run: pipx install poetry + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'poetry' + - name: Install dependencies + run: | + poetry install --with dev + - name: Run unit tests + run: | + poetry run python -m unittest -v tests + From 8e55790f473d4ddb4ef68345627d444158f011dd Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 29 Jun 2023 12:57:27 +0000 Subject: [PATCH 002/112] Add OpenSearch dependency --- pyproject.toml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8b5825e5..39d45f45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,14 +10,15 @@ repository = "https://github.com/libatoms/abcd" documentation = "https://libatoms.github.io/abcd/" [tool.poetry.dependencies] -python = "^3.9" +ase = "^3.23" +lark = "^1.1.9" +matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" -tqdm = "^4.66" +opensearch-py = "^2.2.0" pymongo = "^4.7.3" -matplotlib = "^3.9" -ase = "^3.23" -lark = "^1.1.9" +python = "^3.9" +tqdm = "^4.66" [tool.poetry.group.dev.dependencies] mongomock = "^4.1.2" @@ -32,11 +33,11 @@ pre-commit = "^4.0.1" ruff = "^0.9.6" [tool.poetry.extras] -tests = ["mongomock", "pytest", "pytest-cov"] -mongo = ["pymongo"] http = ["requests"] +mongo = ["pymongo"] server-api = ["flask"] server-app = ["flask", "Flask-Nav", "Flask-MongoEngine", "gunicorn", "flask-paginate"] +tests = ["mongomock", "pytest", "pytest-cov"] [build-system] requires = ["poetry-core"] From 4db84074b0a9235e105ed4d481eb3231ad3b46ff Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 31 Jul 2023 16:07:02 +0000 Subject: [PATCH 003/112] Create initial OpenSearch interface --- abcd/__init__.py | 15 ++++ abcd/backends/atoms_opensearch.py | 120 ++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 abcd/backends/atoms_opensearch.py diff --git a/abcd/__init__.py b/abcd/__init__.py index e646ecec..dd3bddda 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -8,6 +8,7 @@ class ConnectionType(Enum): mongodb = 1 http = 2 + opensearch = 3 class ABCD: @@ -44,6 +45,20 @@ def from_url(cls, url, **kwargs): from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, host=r.geturl(), uri_mode=True, **kwargs) + if r.scheme == "opensearch": + conn_settings = { + "host": r.hostname, + "port": r.port, + "username": r.username, + "password": r.password, + } + + db = r.path.split("/")[1] if r.path else None + db = db if db else "abcd" + + from abcd.backends.atoms_opensearch import OpenSearchDatabase + + return OpenSearchDatabase(db_name=db, **conn_settings, **kwargs) if r.scheme == "http" or r.scheme == "https": raise NotImplementedError("http not yet supported! soon...") if r.scheme == "ssh": diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py new file mode 100644 index 00000000..261b52c2 --- /dev/null +++ b/abcd/backends/atoms_opensearch.py @@ -0,0 +1,120 @@ +import logging + +from os import linesep + +from ase import Atoms + +import abcd.errors +from abcd.model import AbstractModel +from abcd.database import AbstractABCD + +from opensearchpy import OpenSearch, AuthenticationException, ConnectionTimeout + +logger = logging.getLogger(__name__) + + +class AtomsModel(AbstractModel): + def __init__(self, client=None, index_name=None, dict=None): + super().__init__(dict) + + self._client = client + self._index_name = index_name + + @classmethod + def from_atoms(cls, client, index_name, atoms: Atoms, extra_info=None, store_calc=True): + obj = super().from_atoms(atoms, extra_info, store_calc) + obj._client = client + obj._index_name = index_name + return obj + + @property + def _id(self): + return self.get("_id", None) + + +class OpenSearchDatabase(AbstractABCD): + """Wrapper to make database operations easy""" + + def __init__( + self, + host="localhost", + port=9200, + index_name="atoms", + username="admin", + password="admin", + **kwargs): + + super().__init__() + + logger.info((host, port, index_name, username, password, kwargs)) + + self.client = OpenSearch( + hosts=[{"host": host, "port": port}], + http_auth=(username, password), + verify_certs=False, + ca_certs=False, + use_ssl=True, + ssl_assert_hostname=False, + ssl_show_warn=False, + ) + + try: + info = self.client.info() + logger.info("DB info: {}".format(info)) + + except AuthenticationException: + raise abcd.errors.AuthenticationError() + + except ConnectionTimeout: + raise abcd.errors.TimeoutError() + + self.index_name = index_name + self.create() + + def info(self): + host = self.client.transport.hosts[0]["host"] + port = self.client.transport.hosts[0]["port"] + + self.client.indices.refresh(index=self.index_name) + return { + "host": host, + "port": port, + "index": self.index_name, + "number of confs": self.client.count(index=self.index_name)["count"], + "type": "opensearch" + } + + def create(self): + self.client.indices.create(index=self.index_name, ignore=400) + + def __repr__(self): + host = self.client.transport.hosts[0]["host"] + port = self.client.transport.hosts[0]["port"] + + return "{}(".format(self.__class__.__name__) + \ + "url={}:{}, ".format(host, port) + \ + "index={}) ".format(self.index_name) + + def _repr_html_(self): + """Jupyter notebook representation""" + return "ABCD OpenSearch database" + + def print_info(self): + """shows basic information about the connected database""" + + out = linesep.join(["{:=^50}".format(" ABCD OpenSearch "), + "{:>10}: {}".format("type", "opensearch"), + linesep.join("{:>10}: {}".format(k, v) for k, v in self.info().items())]) + + print(out) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + +if __name__ == "__main__": + db = OpenSearchDatabase(username="admin", password="admin") + print(db.info()) From 0503ab9d9f4a7a6d35f5cfde878e5e374e85731d Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 31 Jul 2023 16:15:21 +0000 Subject: [PATCH 004/112] Add OpenSearch insertion and deletion functions --- abcd/backends/atoms_opensearch.py | 79 ++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 261b52c2..1028b977 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -1,17 +1,33 @@ +import types import logging +from typing import Union, Iterable from os import linesep +from datetime import datetime from ase import Atoms +from ase.io import iread import abcd.errors from abcd.model import AbstractModel from abcd.database import AbstractABCD +from abcd.parsers import extras -from opensearchpy import OpenSearch, AuthenticationException, ConnectionTimeout +from pathlib import Path + +from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout logger = logging.getLogger(__name__) +map_types = { + bool: "bool", + float: "float", + int: "int", + str: "str", + datetime: "date", + dict: "dict" +} + class AtomsModel(AbstractModel): def __init__(self, client=None, index_name=None, dict=None): @@ -31,6 +47,13 @@ def from_atoms(cls, client, index_name, atoms: Atoms, extra_info=None, store_cal def _id(self): return self.get("_id", None) + def save(self): + if not self._id: + body = {} + body.update(self.data) + body["derived"] = self.derived + self._client.index(index=self._index_name, body=body) + class OpenSearchDatabase(AbstractABCD): """Wrapper to make database operations easy""" @@ -84,9 +107,63 @@ def info(self): "type": "opensearch" } + def delete(self, query=None): + # query = parser(query) + if not query: + query = { + "match_all": {} + } + + self.client.delete_by_query( + index=self.index_name, + body={ + "query": query, + }, + ) + + def destroy(self): + self.client.indices.delete(index=self.index_name, ignore=404) + def create(self): self.client.indices.create(index=self.index_name, ignore=400) + def save_bulk(self, actions: Iterable): + helpers.bulk(client=self.client, actions=actions, index=self.index_name) + + def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): + + if extra_info and isinstance(extra_info, str): + extra_info = extras.parser.parse(extra_info) + + # Could combine into single data.save, but keep separate for option of bulk insertion? + if isinstance(atoms, Atoms): + data = AtomsModel.from_atoms(self.client, self.index_name, atoms, extra_info=extra_info, store_calc=store_calc) + data.save() + + elif isinstance(atoms, types.GeneratorType) or isinstance(atoms, list): + + actions = [] + for item in atoms: + data = AtomsModel.from_atoms(self.client, self.index_name, item, extra_info=extra_info, store_calc=store_calc) + actions.append(data.data) + actions[-1]["derived"] = data.derived + self.save_bulk(actions) + + def upload(self, file: Path, extra_infos=None, store_calc=True): + + if isinstance(file, str): + file = Path(file) + + extra_info = {} + if extra_infos: + for info in extra_infos: + extra_info.update(extras.parser.parse(info)) + + extra_info["filename"] = str(file) + + data = iread(str(file)) + self.push(data, extra_info, store_calc=store_calc) + def __repr__(self): host = self.client.transport.hosts[0]["host"] port = self.client.transport.hosts[0]["port"] From 25d76402b628d7915d54ac8a73683eb6ac6dd3bc Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 31 Jul 2023 16:19:35 +0000 Subject: [PATCH 005/112] Add OpenSearch property functions --- abcd/backends/atoms_opensearch.py | 227 ++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 1028b977..335eb23e 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -164,6 +164,233 @@ def upload(self, file: Path, extra_infos=None, store_calc=True): data = iread(str(file)) self.push(data, extra_info, store_calc=store_calc) + def get_atoms(self, query=None): + # query = parser(query) + if not query: + query = { + "query": { + "match_all": {} + } + } + + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + ): + yield AtomsModel(None, None, hit["_source"]).to_ase() + + def count(self, query=None): + # query = parser(query) + logger.info("query; {}".format(query)) + + if not query: + query = { + "match_all": {} + } + + return self.client.count(index=self.index_name, body={"query": query})["count"] + + # Slow - use count_property where possible! + def property(self, name, query=None): + # query = parser(query) + if not query: + query = { + "match_all": {} + } + + body = { + "query": query, + } + + return [hit["_source"][format(name)] for hit in helpers.scan( + self.client, + index=self.index_name, + query=body, + stored_fields=format(name), + _source=format(name), + )] + + def count_property(self, name, query=None): + # query = parser(query) + if not query: + query = { + "match_all": {} + } + + body = { + "size" : 0, + "query": query, + "aggs": { + format(name): { + "terms": { + "field": format(name), + "size": 10000, # Use composite for all results? + }, + }, + } + } + + prop = {} + + for val in self.client.search( + index=self.index_name, + body=body, + )["aggregations"][format(name)]["buckets"]: + prop[val["key"]] = val["doc_count"] + + return prop + + def properties(self, query=None): + # query = parser(query) + if not query: + query = { + "match_all": {} + } + + properties = {} + + for prop in self.client.indices.get_mapping(self.index_name)[self.index_name]["mappings"]["properties"].keys(): + + body = { + "size" : 0, + "query": query, + "aggs": { + "info_keys": { + "filter": { + "term": { + "derived.info_keys.keyword": prop + } + }, + }, + "derived_keys": { + "filter": { + "term": { + "derived.derived_keys.keyword": prop + } + }, + }, + "arrays_keys": { + "filter": { + "term": { + "derived.arrays_keys.keyword": prop + } + }, + }, + } + } + + res = self.client.search( + index=self.index_name, + body=body, + ) + + derived = ["info_keys", "derived_keys", "arrays_keys"] + for label in derived: + count = res["aggregations"][label]["doc_count"] + if count > 0: + key = label.split("_")[0] + if key in properties: + properties[key].append(prop) + else: + properties[key] = [prop] + + return properties + + def get_type_of_property(self, prop, category): + # TODO: Probably it would be nicer to store the type info in the database from the beginning. + atoms = self.client.search( + index=self.index_name, + body = { + "size" : 1, + "query": { + "exists" : { + "field": prop + } + } + } + ) + + data = atoms["hits"]["hits"][0]["_source"][prop] + + if category == "arrays": + if type(data[0]) == list: + return "array({}, N x {})".format(map_types[type(data[0][0])], len(data[0])) + else: + return "vector({}, N)".format(map_types[type(data[0])]) + + if type(data) == list: + if type(data[0]) == list: + if type(data[0][0]) == list: + return "list(list(...)" + else: + return "array({})".format(map_types[type(data[0][0])]) + else: + return "vector({})".format(map_types[type(data[0])]) + else: + return "scalar({})".format(map_types[type(data)]) + + def count_properties(self, query=None): + # query = parser(query) + if not query: + query = { + "match_all": {} + } + + properties = {} + + try: + keys = self.client.indices.get_mapping(self.index_name)[self.index_name]["mappings"]["properties"].keys() + except KeyError: + return properties + + for key in keys: + + body = { + "size" : 0, + "query": query, + "aggs": { + "info_keys": { + "filter": { + "term": { + "derived.info_keys.keyword": key + } + }, + }, + "derived_keys": { + "filter": { + "term": { + "derived.derived_keys.keyword": key + } + }, + }, + "arrays_keys": { + "filter": { + "term": { + "derived.arrays_keys.keyword": key + } + }, + }, + } + } + + res = self.client.search( + index=self.index_name, + body=body, + ) + + derived = ["info_keys", "derived_keys", "arrays_keys"] + for label in derived: + count = res["aggregations"][label]["doc_count"] + if count > 0: + properties[key] = { + "count": count, + "category": label.split("_")[0], + "dtype": self.get_type_of_property(key, label.split("_")[0]) + } + + return properties + def __repr__(self): host = self.client.transport.hosts[0]["host"] port = self.client.transport.hosts[0]["port"] From 29cfa96931adaccdf826496c2d2b3bba6e0eac61 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 31 Jul 2023 16:00:26 +0000 Subject: [PATCH 006/112] Add openmock dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 39d45f45..774722fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ tqdm = "^4.66" [tool.poetry.group.dev.dependencies] mongomock = "^4.1.2" +openmock = "^2.2" pytest = "^8.2.2" pytest-cov = "^5.0.0" pytest-mock = "^3.14.0" From 9f78aa5005db15c148b54fe34e4a1f665f192086 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 31 Jul 2023 19:22:55 +0000 Subject: [PATCH 007/112] Add OpenSearch example notebooks --- tutorials/abcd_opensearch_uploading.ipynb | 552 +++++++++ tutorials/abcd_opensearch_usage.ipynb | 1354 +++++++++++++++++++++ 2 files changed, 1906 insertions(+) create mode 100644 tutorials/abcd_opensearch_uploading.ipynb create mode 100644 tutorials/abcd_opensearch_usage.ipynb diff --git a/tutorials/abcd_opensearch_uploading.ipynb b/tutorials/abcd_opensearch_uploading.ipynb new file mode 100644 index 00000000..661761b0 --- /dev/null +++ b/tutorials/abcd_opensearch_uploading.ipynb @@ -0,0 +1,552 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "# Basic usage of ABCD database" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pathlib import Path\n", + "from ase.io import iread, read\n", + "\n", + "from abcd import ABCD" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "First of all, we need to define the url of the database. It could be local or remote:\n", + "\n", + "- direct access: url = 'opensearch://admin:admin@localhost:9200'\n", + "- api access: url = 'http://localhost/api'\n", + "\n", + "using with statement to catch the riased exceptions. You may can ignore them but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "url = 'opensearch://admin:admin@localhost:9200'\n", + "abcd = ABCD.from_url(url)\n", + "\n", + "print(abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " index: atoms\n", + "number of confs: 82204\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Cleanup \n", + "\n", + "WARNING!! Remove all elements from the database.\n", + "Only supported in the case of local access" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.create()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " index: atoms\n", + "number of confs: 0\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Uploading configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "is_executing": false, + "metadata": false, + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/ubuntu/abcd/tutorials\n" + ] + } + ], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "is_executing": false, + "metadata": false, + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "file = directory / 'input.data.2055.xyz'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "Uploading configurations on-by-one directly from an ase atoms object:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 15.3 s, sys: 162 ms, total: 15.5 s\n", + "Wall time: 23.7 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with abcd as db:\n", + " for atoms in iread(file.as_posix(), index=slice(None)):\n", + " \n", + " # Hack to fix the representation of forces\n", + " \n", + "# atoms.calc.results['forces'] = atoms.arrays['force']\n", + "# del(atoms.info['energy'])\n", + " \n", + " db.push(atoms, store_calc=False)\n", + " # break" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 7min 27s, sys: 6.73 s, total: 7min 33s\n", + "Wall time: 9min 28s\n" + ] + } + ], + "source": [ + "%%time\n", + "with abcd as db:\n", + "\n", + " atoms_list = []\n", + " for i in range(32):\n", + " for atoms in iread(file.as_posix(), index=slice(None)):\n", + " atoms_list.append(atoms)\n", + " db.push(atoms_list, store_calc=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " index: atoms\n", + "number of confs: 2055\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'energy': -55970.4941491558}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "atoms.info" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "Reading the trajectory from file:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.7 s, sys: 60.1 ms, total: 2.76 s\n", + "Wall time: 2.76 s\n" + ] + }, + { + "data": { + "text/plain": [ + "2055" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "traj = read(file.as_posix(), index=slice(None))\n", + "len(traj)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "Pushing the whole trajectory to the database:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Atoms(symbols='C48H28O32Zr6', pbc=True, cell=[[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], forces=..., calculator=SinglePointCalculator(...))" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traj[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 12.5 s, sys: 116 ms, total: 12.6 s\n", + "Wall time: 16.1 s\n" + ] + } + ], + "source": [ + "%%time\n", + "db.push(traj, store_calc=False)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "Uploading a whole file and injecting to the database on the server side:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 15.2 s, sys: 172 ms, total: 15.4 s\n", + "Wall time: 19 s\n" + ] + } + ], + "source": [ + "%%time\n", + "abcd.upload(file.as_posix(), store_calc=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'host': 'localhost',\n", + " 'port': 9200,\n", + " 'index': 'atoms',\n", + " 'number of confs': 8220,\n", + " 'type': 'opensearch'}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "pycharm": {} + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'cell': {'count': 8220, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'elements': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", + " 'energy': {'count': 8220, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'filename': {'count': 2055, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'forces': {'count': 8220,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'formula': {'count': 8220, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash_structure': {'count': 8220,\n", + " 'category': 'derived',\n", + " 'dtype': 'scalar(str)'},\n", + " 'modified': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 8220, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'numbers': {'count': 8220, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'pbc': {'count': 8220, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'positions': {'count': 8220,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'uploaded': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'username': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(float)'}}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.count_properties()" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/abcd_opensearch_usage.ipynb b/tutorials/abcd_opensearch_usage.ipynb new file mode 100644 index 00000000..0e928ed8 --- /dev/null +++ b/tutorials/abcd_opensearch_usage.ipynb @@ -0,0 +1,1354 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basic usage of ABCD database" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from collections import Counter\n", + "\n", + "from abcd import ABCD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First of all, we need to define the url of the database. It could be local or remote:\n", + "\n", + "- direct access: url = 'opensearch://admin:admin@localhost:9200'\n", + "- api access: url = 'http://localhost/api'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "url = 'opensearch://admin:admin@localhost:9200'\n", + "abcd = ABCD.from_url(url)\n", + "\n", + "print(abcd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Basic information about the database:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " index: atoms\n", + "number of confs: 8220\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data stored in the database:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'_index': 'atoms',\n", + " '_id': 'PUBJrYkBtksDlC5rJEIo',\n", + " '_score': 1.0,\n", + " '_source': {'n_atoms': 114,\n", + " 'cell': [[14.759483662029265, 0.0, 0.0],\n", + " [7.380258413807584, 12.781786651387147, 0.0],\n", + " [7.380243655055182, 4.260782501715179, 12.050631347394049]],\n", + " 'pbc': [True, True, True],\n", + " 'formula': 'C48H28O32Zr6',\n", + " 'numbers': [1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40],\n", + " 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296],\n", + " [10.391931260040497, 9.232075241735581, 8.799170748954813],\n", + " [15.152442318761134, 3.2144705981189303, 0.6236271192356346],\n", + " [15.428455018627362, 13.198368239182761, 6.757442369774353],\n", + " [20.968952462595865, 8.354501228588285, 5.937790321351722],\n", + " [12.821718368988067, 11.860905590260213, 0.764468940894911],\n", + " [20.164574198879585, 13.449131931085539, 8.500504258460039],\n", + " [5.203325638335655, 4.037525599970674, 3.6535544413570706],\n", + " [6.476452578322519, 9.882112891744764, 0.7336632917566172],\n", + " [14.332783438660714, 4.5739237510789845, 5.763830060388294],\n", + " [12.20845295758527, 7.975607890442319, 2.7181563401019804],\n", + " [26.126453831046035, 15.25865575215541, 8.681035572143871],\n", + " [7.431639790543854, 7.68880010777489, 3.739705967641281],\n", + " [21.52510600020679, 15.432405681052952, 8.675468268048236],\n", + " [11.49107468172553, 9.60164215963523, 0.7009214784567679],\n", + " [18.70674083756121, 4.607625571215378, 5.677858158016438],\n", + " [17.34676875755316, 10.130528920703508, 4.483049170020872],\n", + " [2.9330861621787743, 3.3990818416373494, 0.720770788622487],\n", + " [22.67189915206641, 9.23882668038352, 8.661796350384211],\n", + " [15.54501705742674, 14.931708899088871, 4.905010140501105],\n", + " [20.180891240581246, 11.991517760259551, 0.36399634878062614],\n", + " [13.537900990107627, 8.71222318139275, 4.81955270950513],\n", + " [13.02082403030889, 3.7798366294145125, 3.8744821907763676],\n", + " [12.738608267554484, 13.15125952920471, 8.605595280531846],\n", + " [9.30512423974256, 4.003262597986021, 2.08391144947309],\n", + " [13.66172535110934, 6.786427797477926, 9.451058899918706],\n", + " [19.297496722626608, 6.66303528741421, 9.65403361924748],\n", + " [16.364750768476505, 11.479160632545504, 9.561987519221761],\n", + " [16.965060879285595, 12.039276627942046, 3.4048076978088133],\n", + " [9.872075532499599, 8.868306222192839, 9.697764141289875],\n", + " [23.093789800187132, 7.8655671087878325, 11.819108411864843],\n", + " [15.963054660441902, 12.957563889928995, 5.796919712452191],\n", + " [20.789096532494103, 7.4060484208004835, 5.418655862348822],\n", + " [20.271838371834924, 15.445983071791856, 11.893743962525676],\n", + " [20.235785459686173, 13.946169611391733, 9.533785527794883],\n", + " [5.468638782470736, 4.963675408702813, 4.207234520017469],\n", + " [13.30165782031905, 14.29182491089219, 11.848895929341783],\n", + " [13.860085269446175, 5.316906102226919, 5.113333629612867],\n", + " [12.690982408563865, 7.210540402496312, 3.452020777408105],\n", + " [26.695527891724396, 14.913992347710204, 9.505586002333807],\n", + " [6.71805210100351, 7.061748291218562, 4.294338782243351],\n", + " [20.991757417986378, 15.139143269943215, 9.631524849633491],\n", + " [19.51051107137078, 14.241804768445284, 11.863723702327984],\n", + " [19.467115888926717, 5.338714780974876, 5.342773829476735],\n", + " [17.04442989892783, 11.179573262577135, 4.5377135674632525],\n", + " [9.75214492091448, 7.909665520118783, 11.856922309351365],\n", + " [23.22825354947115, 8.805217731024735, 9.58442235219589],\n", + " [16.00517946832365, 13.88422938185283, 4.718010810191537],\n", + " [27.480789782170447, 15.480087960917762, 11.617172959535212],\n", + " [13.389347998136012, 7.596486021826197, 4.594226402069221],\n", + " [13.123855701056296, 4.872440299211145, 4.040302647886903],\n", + " [12.64753966896389, 13.717491998222464, 9.492966517597923],\n", + " [20.97530038245366, 8.335251317448973, 10.600423519640026],\n", + " [16.502636637988203, 10.85079294806207, 6.900496977690235],\n", + " [9.019607067373974, 1.3080109208445687, 2.1730394493971033],\n", + " [12.07299373071628, 8.404520741314311, 10.81235772773731],\n", + " [18.747600350981866, 12.258841111031488, 10.56065092033455],\n", + " [7.015172452108002, 5.4262894076561405, 2.347590240376888],\n", + " [18.78251160034123, 7.0857013283180255, 6.946566060828641],\n", + " [14.224469007695566, 4.466485050187827, 10.705760127563483],\n", + " [11.603451262170989, 5.349451807744306, 2.0915175410667244],\n", + " [18.543838401175798, 4.375079148751448, 10.585231910862703],\n", + " [14.160667831243819, 12.098710870007285, 10.795787760496086],\n", + " [14.45018544044642, 7.011509947840652, 6.832073401261807],\n", + " [16.477007379156557, 13.350335720646678, 3.479800617715256],\n", + " [10.582432902092235, 8.377782469927169, 10.803729958995012],\n", + " [22.44315777833504, 8.370354822180708, 10.648120637633818],\n", + " [16.567659589367246, 11.689973911016514, 5.702162918040675],\n", + " [19.752709342190204, 6.556676208576273, 5.912505297587319],\n", + " [20.937902942140084, 15.937436540790538, 10.817725992408736],\n", + " [19.488886710337034, 13.540165709815675, 10.668618978356523],\n", + " [6.371529009087222, 5.8630806488390075, 3.652784530849877],\n", + " [13.287113670442396, 13.309066380643525, 10.810269250499239],\n", + " [13.887372022428522, 6.66488149144997, 5.501186858702026],\n", + " [12.488596287754628, 5.853559681882998, 3.200679859004045],\n", + " [26.74856971012843, 15.90075025114669, 10.529236108850316],\n", + " [10.08315746894864, 1.501050200458931, 1.573941101482483],\n", + " [12.62721678223712, 8.955703669760464, 9.832552292079287],\n", + " [20.39919235012287, 7.4641321602901884, 11.35141521129136],\n", + " [15.34144808796582, 10.482235049392207, 7.284668181164747],\n", + " [19.016384691239136, 8.259448068479994, 7.407999661220768],\n", + " [13.965549620748263, 5.310837048175544, 11.629395661906845],\n", + " [18.964757008970206, 11.57454425853002, 9.498296919639932],\n", + " [6.724169659438005, 4.266531959190234, 1.8722045706136907],\n", + " [14.989309002358754, 11.848517668179902, 11.724244051601735],\n", + " [15.200359309885917, 6.086405481062033, 7.400205812261053],\n", + " [10.683892382055284, 6.184519761996729, 1.7395703404079805],\n", + " [17.662435367568307, 4.527759521878119, 9.64210249881064],\n", + " [7.857986041984485, 6.202923889200398, 1.7950802519056281],\n", + " [14.981497208999828, 4.615121609824532, 9.745747168438765],\n", + " [17.887354642477394, 12.0129465321045, 11.494403179921116],\n", + " [17.806453279094754, 6.309886859779258, 7.194991281595532],\n", + " [17.65884499015476, 10.488278242553303, 7.416379002429795],\n", + " [12.548166448986402, 7.693791677648976, 11.776895469518676],\n", + " [20.34393939120336, 9.206817729894354, 9.922408032378584],\n", + " [7.857463468905509, 1.74909233925955, 1.8274196418121598],\n", + " [18.857763430781485, 5.182306608404846, 11.489619301518633],\n", + " [14.163844810128705, 8.142657329873755, 7.360230728587728],\n", + " [11.792894920225645, 4.155462409412462, 1.7001921686281716],\n", + " [13.835964747759567, 11.264146337910587, 9.884038668157677],\n", + " [18.442882359243793, 7.121400257955315, 9.800715117804442],\n", + " [16.392129968717537, 10.582139510371382, 9.857218178507331],\n", + " [9.302433061412287, 3.9652007015997683, 1.0673996577713942],\n", + " [14.478398449342821, 7.162072897735833, 9.816748197723141],\n", + " [15.11113884153613, 9.160319431120458, 11.201390619831347],\n", + " [16.44457564782678, 6.551160139418165, 11.181147839950713],\n", + " [16.50639593935091, 8.350138210113135, 8.738223430491407],\n", + " [18.033642210935266, 9.122422759734661, 11.23846663261546],\n", + " [10.893906920444666, 2.90658841770915, -0.019051977695474183],\n", + " [14.757549238860731, 9.308555112561303, 9.214183379238824],\n", + " [18.225880348003468, 9.344784918068, 9.180851999025142],\n", + " [7.388359137923477, 3.030521667505481, 0.14692076074094484],\n", + " [9.220983449272417, 6.075785767588413, 0.00657883162539968],\n", + " [16.440728831136994, 6.2934328881825925, 9.187973671216682]],\n", + " 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563],\n", + " [0.16925367694563342, -0.01943702713953078, 0.38893903196958485],\n", + " [0.01574595116377608, 0.17132290092535438, -0.1999552221020049],\n", + " [0.6313750521363777, 0.05251478601615336, -0.8064430222079316],\n", + " [-0.09833287623511343, -0.138000887230052, -0.08874934559146055],\n", + " [0.19781246456634455, -0.9287673780647797, -1.0439826331463689],\n", + " [0.07987955323902354, 0.3227860853196942, 1.7840037712935266],\n", + " [0.3716884711227413, 0.4696845328184121, 0.370453313071228],\n", + " [-0.14715298673081575, -0.28619517081945, -0.2515490388965677],\n", + " [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577],\n", + " [0.9120773177492224, -1.200046035662623, 1.3240873743396222],\n", + " [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373],\n", + " [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696],\n", + " [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312],\n", + " [1.1617998448227365, 0.7015918847484289, -1.3710321220485349],\n", + " [0.19070902021846559, 0.8070636865577138, 0.18004202662121627],\n", + " [0.22735084256512936, 0.16909786808234928, -0.1733360748519467],\n", + " [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667],\n", + " [1.0559542045857038, -0.7522446777152361, 1.3649098107407422],\n", + " [0.7762546692811232, -1.6189836731314526, -0.7746117342375823],\n", + " [0.028996903632322036, -0.4549212573567351, -0.15349589870642655],\n", + " [-0.30669869163043734, -1.474741689618629, -0.22373535702799768],\n", + " [-0.0288004713360364, 0.7124475973319003, -0.205686211479239],\n", + " [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461],\n", + " [0.024624999488289372, -0.06313755663570486, -1.801891965772292],\n", + " [-0.06351139506345264, -0.159376012078223, -0.050473844173332186],\n", + " [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655],\n", + " [0.16200265126520474, 1.5060489867253262, -0.5510645819751359],\n", + " [0.11499568285511277, -1.536591123473717, -0.46194088373525655],\n", + " [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978],\n", + " [1.3289642431824835, 0.5107142001499065, -0.8944297785444403],\n", + " [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165],\n", + " [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311],\n", + " [-1.8548078439117515, 0.38714131650410166, 2.989239836187592],\n", + " [0.4506861359111631, 1.1364487657471294, -0.6025277867192889],\n", + " [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987],\n", + " [1.0444392611021323, -2.684139342402327, -1.0437898003947796],\n", + " [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224],\n", + " [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265],\n", + " [0.1290364782741685, 1.0164497157641128, 0.008965437397225301],\n", + " [0.2684093062541945, -0.321198686108611, -1.4872871313266933],\n", + " [-0.643392389215423, -0.898736890883939, -1.098486424717582],\n", + " [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579],\n", + " [0.8447910287406576, -1.5317872739661245, -1.3569157361908561],\n", + " [0.17489879147082638, 0.8299521628403577, -1.920810638125738],\n", + " [0.3438521635446514, 0.034021868028403804, 0.7196574853586566],\n", + " [-0.49089716393457716, 1.32019318119886, -0.94368029174169],\n", + " [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858],\n", + " [-0.5808395014825368, 3.6184937280564875, 2.299079591174048],\n", + " [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825],\n", + " [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758],\n", + " [0.545840099958743, 0.6951430433146117, 4.287385291900738],\n", + " [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216],\n", + " [1.0342078124131395, -1.450620626387826, 0.9064692271123347],\n", + " [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989],\n", + " [-1.71670514098886, -1.4817345765222634, 0.997617412133566],\n", + " [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182],\n", + " [0.25302742332541905, -0.05521598720041631, 0.07509935788229247],\n", + " [0.4694140527455162, -0.5513201496485756, -0.6341384740218201],\n", + " [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704],\n", + " [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858],\n", + " [0.303680730512897, -1.6201972339147879, -2.1682659662612322],\n", + " [-1.573935371256837, -0.7716241121396255, 0.4766830761494265],\n", + " [0.8572325978731995, -0.38723439044553537, 0.7948401469896356],\n", + " [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165],\n", + " [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755],\n", + " [0.898834592811411, -1.0161931196493315, 0.970241846276607],\n", + " [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992],\n", + " [-0.24175827732254024, 1.3876321937466838, 0.534561183763117],\n", + " [0.6898959355886991, -1.1971756558776325, -2.6977317373963534],\n", + " [0.3469128049778768, -0.8706239325849023, -0.7535286867304852],\n", + " [0.5164133078455445, -1.1737287359463862, -0.8774512404325054],\n", + " [0.07070174270471455, 2.0437350895055317, -2.2527981881306025],\n", + " [0.3288256070994712, -0.3856912142121501, -0.4543299035851946],\n", + " [0.46220673582211436, -1.5513682828935087, 0.2903063650833339],\n", + " [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991],\n", + " [1.9752635218505312, 0.3340079230208498, -1.0452476159967947],\n", + " [0.8562020996487061, 0.4789574741768384, -0.4695076409076208],\n", + " [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598],\n", + " [0.4557039012178522, 0.3706605440016026, 0.8159874720805957],\n", + " [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655],\n", + " [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426],\n", + " [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057],\n", + " [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977],\n", + " [0.5114222820137425, 0.9816344052405028, 0.021354041800663897],\n", + " [-1.066992979728017, 1.3317744591489784, -0.48617301863097473],\n", + " [0.22686850357582064, -0.6558709822362698, -0.6488395287823175],\n", + " [0.7797446849745541, 0.08798264257114148, 0.9232755012995165],\n", + " [0.23828317402856555, 0.1996127511351823, 0.16316684686413324],\n", + " [1.8341906803324939, 0.7465810512458905, -1.8839816394548647],\n", + " [0.25763432631605526, -0.3878936213456354, -0.15371958469827013],\n", + " [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458],\n", + " [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378],\n", + " [1.5710907025053915, 0.6496761658138797, -0.2693559865093303],\n", + " [0.13329885341529846, -0.16092278785630468, -0.8911017423623445],\n", + " [0.4560890525003599, -0.12238914766138208, 0.02436686071149516],\n", + " [0.04387896406897804, 0.4783136698968656, 0.7284398601970398],\n", + " [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581],\n", + " [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554],\n", + " [0.5963838781433183, -0.31271095971465634, -1.0048920919648563],\n", + " [0.2392545368759047, -0.07763909377589129, -0.3403374652590171],\n", + " [1.0458302280169298, -0.7758582051438556, -0.16412124042933254],\n", + " [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372],\n", + " [0.310471014472195, 0.4872791072940909, 0.18266249516014715],\n", + " [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393],\n", + " [0.23109436904931635, 0.627365159344662, 1.1682219467816664],\n", + " [0.06169413921247506, -0.7133752514222126, 0.5373045510423942],\n", + " [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393],\n", + " [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554],\n", + " [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261],\n", + " [0.753222725431297, 0.3270011521591009, -0.33560457820400924],\n", + " [-0.7786571082555904, 0.413619053069661, -1.222248027349609],\n", + " [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947],\n", + " [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]],\n", + " 'energy': -30848.841105643754,\n", + " 'volume': 2273.382588904185,\n", + " 'elements': {'1': 28, '6': 48, '8': 32, '40': 6},\n", + " 'username': 'ubuntu',\n", + " 'uploaded': '2023-07-31T18:49:59.074776',\n", + " 'modified': '2023-07-31T18:49:59.074783',\n", + " 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850',\n", + " 'hash': 'f5a05edfd391a1cbe16805ae58469965',\n", + " 'derived': {'arrays_keys': ['forces', 'numbers', 'positions'],\n", + " 'info_keys': ['energy', 'pbc', 'formula', 'cell', 'n_atoms', 'volume'],\n", + " 'results_keys': [],\n", + " 'derived_keys': ['elements',\n", + " 'username',\n", + " 'uploaded',\n", + " 'modified',\n", + " 'volume',\n", + " 'hash_structure',\n", + " 'hash']}}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.client.search(\n", + " index=abcd.index_name,\n", + " body = {\n", + " 'size' : 1,\n", + " 'query': {\n", + " 'match_all' : {}\n", + " }\n", + " }\n", + ")[\"hits\"][\"hits\"][0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Discovery/Filtering" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Available properties" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'info': ['cell', 'energy', 'filename', 'formula', 'n_atoms', 'pbc', 'volume'],\n", + " 'derived': ['elements',\n", + " 'hash',\n", + " 'hash_structure',\n", + " 'modified',\n", + " 'uploaded',\n", + " 'username',\n", + " 'volume'],\n", + " 'arrays': ['forces', 'numbers', 'positions']}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'cell': {'count': 8220, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'elements': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", + " 'energy': {'count': 8220, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'filename': {'count': 2055, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'forces': {'count': 8220,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'formula': {'count': 8220, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash_structure': {'count': 8220,\n", + " 'category': 'derived',\n", + " 'dtype': 'scalar(str)'},\n", + " 'modified': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 8220, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'numbers': {'count': 8220, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'pbc': {'count': 8220, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'positions': {'count': 8220,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'uploaded': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'username': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 8220, 'category': 'derived', 'dtype': 'scalar(float)'}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.count_properties()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Histogram" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({114: 1264,\n", + " 210: 1336,\n", + " 306: 1400,\n", + " 141: 128,\n", + " 180: 100,\n", + " 144: 96,\n", + " 138: 108,\n", + " 171: 56,\n", + " 207: 68,\n", + " 195: 104,\n", + " 150: 76,\n", + " 129: 72,\n", + " 204: 52,\n", + " 177: 144,\n", + " 168: 56,\n", + " 132: 44,\n", + " 192: 24,\n", + " 126: 224,\n", + " 147: 100,\n", + " 189: 40,\n", + " 135: 84,\n", + " 174: 88,\n", + " 165: 36,\n", + " 186: 52,\n", + " 201: 60,\n", + " 153: 48,\n", + " 198: 92,\n", + " 183: 60,\n", + " 162: 60,\n", + " 156: 24,\n", + " 159: 44,\n", + " 252: 148,\n", + " 279: 76,\n", + " 222: 316,\n", + " 273: 40,\n", + " 300: 80,\n", + " 240: 32,\n", + " 303: 64,\n", + " 291: 72,\n", + " 288: 40,\n", + " 246: 52,\n", + " 249: 108,\n", + " 243: 84,\n", + " 231: 84,\n", + " 234: 32,\n", + " 237: 136,\n", + " 270: 52,\n", + " 264: 32,\n", + " 267: 40,\n", + " 255: 68,\n", + " 258: 92,\n", + " 282: 60,\n", + " 276: 80,\n", + " 297: 32,\n", + " 261: 68,\n", + " 225: 76,\n", + " 228: 64,\n", + " 285: 20,\n", + " 294: 32})" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(abcd.property('n_atoms'))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{306: 1400,\n", + " 210: 1336,\n", + " 114: 1264,\n", + " 222: 316,\n", + " 126: 224,\n", + " 252: 148,\n", + " 177: 144,\n", + " 237: 136,\n", + " 141: 128,\n", + " 138: 108,\n", + " 249: 108,\n", + " 195: 104,\n", + " 147: 100,\n", + " 180: 100,\n", + " 144: 96,\n", + " 198: 92,\n", + " 258: 92,\n", + " 174: 88,\n", + " 135: 84,\n", + " 231: 84,\n", + " 243: 84,\n", + " 276: 80,\n", + " 300: 80,\n", + " 150: 76,\n", + " 225: 76,\n", + " 279: 76,\n", + " 129: 72,\n", + " 291: 72,\n", + " 207: 68,\n", + " 255: 68,\n", + " 261: 68,\n", + " 228: 64,\n", + " 303: 64,\n", + " 162: 60,\n", + " 183: 60,\n", + " 201: 60,\n", + " 282: 60,\n", + " 168: 56,\n", + " 171: 56,\n", + " 186: 52,\n", + " 204: 52,\n", + " 246: 52,\n", + " 270: 52,\n", + " 153: 48,\n", + " 132: 44,\n", + " 159: 44,\n", + " 189: 40,\n", + " 267: 40,\n", + " 273: 40,\n", + " 288: 40,\n", + " 165: 36,\n", + " 234: 32,\n", + " 240: 32,\n", + " 264: 32,\n", + " 294: 32,\n", + " 297: 32,\n", + " 156: 24,\n", + " 192: 24,\n", + " 285: 20}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.count_property('n_atoms')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1264" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + "}\n", + "# query = 'config_type=\"bcc_bulk_54_high\"'\n", + "abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({-30848.841105643754: 4,\n", + " -30848.996527284624: 4,\n", + " -30847.962019232786: 4,\n", + " -30847.614609562614: 4,\n", + " -30848.08155476988: 4,\n", + " -30848.98705228001: 4,\n", + " -30849.452531882027: 4,\n", + " -30848.579077955852: 4,\n", + " -30849.0330240119: 4,\n", + " -30849.15485618998: 4,\n", + " -30848.67208402426: 4,\n", + " -30849.59900560283: 4,\n", + " -30849.152956835238: 4,\n", + " -30849.506049059146: 4,\n", + " -30849.039338414026: 4,\n", + " -30848.75403111327: 4,\n", + " -30849.24287250653: 4,\n", + " -30849.635113751516: 4,\n", + " -30848.669992285017: 4,\n", + " -30848.53989165518: 4,\n", + " -30848.541667742345: 4,\n", + " -30848.982825263305: 4,\n", + " -30848.993274163426: 4,\n", + " -30848.955658231844: 4,\n", + " -30848.92919298202: 4,\n", + " -30848.90066347646: 4,\n", + " -30848.973394885365: 4,\n", + " -30849.3060538074: 4,\n", + " -30846.88227034688: 4,\n", + " -30848.082359138454: 4,\n", + " -30848.393470452345: 4,\n", + " -30848.440153217758: 4,\n", + " -30846.06428791957: 4,\n", + " -30847.61709396216: 4,\n", + " -30847.702455535888: 4,\n", + " -30848.600852779062: 4,\n", + " -30847.24975766611: 4,\n", + " -30847.531694292487: 4,\n", + " -30847.90776870881: 4,\n", + " -30847.771109590714: 4,\n", + " -30847.67403895758: 4,\n", + " -30846.47248211173: 4,\n", + " -30846.75463343594: 4,\n", + " -30847.57171516637: 4,\n", + " -30847.4309729798: 4,\n", + " -30847.941100207667: 4,\n", + " -30848.40081779869: 4,\n", + " -30847.559415619886: 4,\n", + " -30847.55251562873: 4,\n", + " -30846.550409262683: 4,\n", + " -30846.664451365174: 4,\n", + " -30848.02147393434: 4,\n", + " -30846.957723438623: 4,\n", + " -30847.869441471594: 4,\n", + " -30847.770324542227: 4,\n", + " -30845.99422839666: 4,\n", + " -30846.257759513635: 4,\n", + " -30847.237276347576: 4,\n", + " -30848.725756034168: 4,\n", + " -30849.10906976763: 4,\n", + " -30846.714004115463: 4,\n", + " -30846.093730911365: 4,\n", + " -30848.67609498256: 4,\n", + " -30849.13014471399: 4,\n", + " -30849.31819552784: 4,\n", + " -30847.641800267986: 4,\n", + " -30847.300712347016: 4,\n", + " -30848.977763945506: 4,\n", + " -30847.587409877484: 4,\n", + " -30847.195874495854: 4,\n", + " -30849.623906470068: 4,\n", + " -30848.009476978466: 4,\n", + " -30848.729018135124: 4,\n", + " -30849.453980616217: 4,\n", + " -30846.526017520486: 4,\n", + " -30849.52193669899: 4,\n", + " -30847.335766326607: 4,\n", + " -30849.25327215404: 4,\n", + " -30846.79657244042: 4,\n", + " -30849.481313637138: 4,\n", + " -30847.107177894653: 4,\n", + " -30849.470211663753: 4,\n", + " -30848.86540241822: 4,\n", + " -30849.12388228561: 4,\n", + " -30849.586785513595: 4,\n", + " -30849.457516191604: 4,\n", + " -30848.815769122226: 4,\n", + " -30847.04439251945: 4,\n", + " -30847.68463479918: 4,\n", + " -30848.99177318337: 4,\n", + " -30848.9883674063: 4,\n", + " -30849.318018109607: 4,\n", + " -30849.904649332933: 4,\n", + " -30847.502294294685: 4,\n", + " -30849.211046885775: 4,\n", + " -30847.649691025705: 4,\n", + " -30848.07960779521: 4,\n", + " -30846.773776646005: 4,\n", + " -30849.59512444284: 4,\n", + " -30846.96259019501: 4,\n", + " -30847.030064636252: 4,\n", + " -30848.947369371544: 4,\n", + " -30848.133416134162: 4,\n", + " -30847.670196709874: 4,\n", + " -30846.260968008162: 4,\n", + " -30847.369155513596: 4,\n", + " -30846.229256947347: 4,\n", + " -30846.716904849214: 4,\n", + " -30848.71766908235: 4,\n", + " -30846.37205903584: 4,\n", + " -30846.517928391757: 4,\n", + " -30845.753419331242: 4,\n", + " -30848.195815379797: 4,\n", + " -30848.44611904203: 4,\n", + " -30849.7590251516: 4,\n", + " -30846.83054939335: 4,\n", + " -30847.115746760108: 4,\n", + " -30848.25433346544: 4,\n", + " -30849.638691504548: 4,\n", + " -30849.45334359767: 4,\n", + " -30848.09680838443: 4,\n", + " -30848.651082548637: 4,\n", + " -30848.689395635934: 4,\n", + " -30847.900528303213: 4,\n", + " -30848.28875777356: 4,\n", + " -30848.404342489517: 4,\n", + " -30848.069465839526: 4,\n", + " -30849.636098259463: 4,\n", + " -30847.359277236246: 4,\n", + " -30847.5867701378: 4,\n", + " -30848.72019511532: 4,\n", + " -30847.96739946803: 4,\n", + " -30847.220609373635: 4,\n", + " -30849.718864411196: 4,\n", + " -30847.575443126254: 4,\n", + " -30849.163763020853: 4,\n", + " -30848.918347884122: 4,\n", + " -30848.10928453281: 4,\n", + " -30849.162151018347: 4,\n", + " -30849.42646092519: 4,\n", + " -30848.531750824823: 4,\n", + " -30848.941888726284: 4,\n", + " -30848.2045578539: 4,\n", + " -30848.815644494076: 4,\n", + " -30848.383001960032: 4,\n", + " -30848.54074119465: 4,\n", + " -30848.680663230043: 4,\n", + " -30849.466605338766: 4,\n", + " -30849.520597626684: 4,\n", + " -30849.217094072093: 4,\n", + " -30848.982508794885: 4,\n", + " -30849.108042537806: 4,\n", + " -30848.373380286044: 4,\n", + " -30849.226976431157: 4,\n", + " -30849.37818085142: 4,\n", + " -30849.362841248894: 4,\n", + " -30849.378912021366: 4,\n", + " -30848.85828419175: 4,\n", + " -30849.401400327115: 4,\n", + " -30849.34273176251: 4,\n", + " -30848.855644143074: 4,\n", + " -30848.96776729862: 4,\n", + " -30849.19741289292: 4,\n", + " -30849.407071724192: 4,\n", + " -30848.132128219266: 4,\n", + " -30848.89840928524: 4,\n", + " -30849.356462083666: 4,\n", + " -30849.05438331725: 4,\n", + " -30849.873599780913: 4,\n", + " -30849.526808353432: 4,\n", + " -30848.976195753326: 4,\n", + " -30848.629281602498: 4,\n", + " -30849.23741226981: 4,\n", + " -30848.758917733976: 4,\n", + " -30848.774262506668: 4,\n", + " -30849.013950462984: 4,\n", + " -30849.496291600346: 4,\n", + " -30849.382986926423: 4,\n", + " -30849.2400408897: 4,\n", + " -30848.835008660597: 4,\n", + " -30848.591004706344: 4,\n", + " -30848.67263968076: 4,\n", + " -30849.12055787058: 4,\n", + " -30849.28197962207: 4,\n", + " -30849.22011535228: 4,\n", + " -30849.522590044373: 4,\n", + " -30849.559969509013: 4,\n", + " -30849.151450412905: 4,\n", + " -30848.723989198872: 4,\n", + " -30848.69853185879: 4,\n", + " -30847.982365186115: 4,\n", + " -30847.73307351533: 4,\n", + " -30848.82108268957: 4,\n", + " -30849.265600544593: 4,\n", + " -30848.146135824445: 4,\n", + " -30848.864224165205: 4,\n", + " -30848.200395600295: 4,\n", + " -30848.869698551844: 4,\n", + " -30848.249295005204: 4,\n", + " -30848.874686671017: 4,\n", + " -30849.203368104754: 4,\n", + " -30848.150455631978: 4,\n", + " -30848.57532332881: 4,\n", + " -30847.940033521336: 4,\n", + " -30848.866119710357: 4,\n", + " -30848.638517691143: 4,\n", + " -30848.628566759387: 4,\n", + " -30847.812624097576: 4,\n", + " -30848.714004525: 4,\n", + " -30849.45968031313: 4,\n", + " -30848.554322397416: 4,\n", + " -30848.172777131935: 4,\n", + " -30848.159783695108: 4,\n", + " -30847.12917449066: 4,\n", + " -30848.46307445666: 4,\n", + " -30849.25260248183: 4,\n", + " -30849.37171950781: 4,\n", + " -30849.073392375183: 4,\n", + " -30848.425393761976: 4,\n", + " -30848.798025665852: 4,\n", + " -30848.214829063665: 4,\n", + " -30848.359202065472: 4,\n", + " -30849.12210021194: 4,\n", + " -30847.944163121276: 4,\n", + " -30849.271018331547: 4,\n", + " -30847.921526785583: 4,\n", + " -30849.19388738575: 4,\n", + " -30849.61212393992: 4,\n", + " -30849.2890284595: 4,\n", + " -30848.508314202267: 4,\n", + " -30848.301582771906: 4,\n", + " -30849.123384045128: 4,\n", + " -30849.53606076891: 4,\n", + " -30848.950493510773: 4,\n", + " -30848.67271641687: 4,\n", + " -30848.817386294893: 4,\n", + " -30848.931213699547: 4,\n", + " -30847.940955715207: 4,\n", + " -30849.75504875176: 4,\n", + " -30848.948813479798: 4,\n", + " -30848.52031687253: 4,\n", + " -30848.9347604316: 4,\n", + " -30849.05508564312: 4,\n", + " -30848.816537299652: 4,\n", + " -30849.463037109716: 4,\n", + " -30849.11768897415: 4,\n", + " -30848.64714614954: 4,\n", + " -30848.651726642143: 4,\n", + " -30848.933730752753: 4,\n", + " -30848.99966883914: 4,\n", + " -30847.57795664198: 4,\n", + " -30849.15318649933: 4,\n", + " -30849.66395537168: 4,\n", + " -30848.042595140054: 4,\n", + " -30848.797511098543: 4,\n", + " -30849.243529117273: 4,\n", + " -30848.82386967973: 4,\n", + " -30849.18189260679: 4,\n", + " -30849.295537423044: 4,\n", + " -30848.867197281244: 4,\n", + " -30849.469959414208: 4,\n", + " -30849.37153691941: 4,\n", + " -30849.681773659362: 4,\n", + " -30848.502940225644: 4,\n", + " -30848.670962915156: 4,\n", + " -30849.229173478463: 4,\n", + " -30849.153212622263: 4,\n", + " -30848.92831242157: 4,\n", + " -30848.285696764746: 4,\n", + " -30848.947121203702: 4,\n", + " -30846.81224075649: 4,\n", + " -30848.549609929585: 4,\n", + " -30849.316313044154: 4,\n", + " -30849.291198295425: 4,\n", + " -30848.18794938444: 4,\n", + " -30848.50753051435: 4,\n", + " -30849.550507565862: 4,\n", + " -30847.694866824553: 4,\n", + " -30847.81796923013: 4,\n", + " -30847.771420344743: 4,\n", + " -30848.976801206663: 4,\n", + " -30849.977784286697: 4,\n", + " -30849.253732842804: 4,\n", + " -30849.015574710615: 4,\n", + " -30848.515430796055: 4,\n", + " -30848.69947908714: 4,\n", + " -30848.81321696633: 4,\n", + " -30849.199748718303: 4,\n", + " -30848.474982159183: 4,\n", + " -30848.274914253034: 4,\n", + " -30848.120799030803: 4,\n", + " -30848.72381640657: 4,\n", + " -30849.281977989383: 4,\n", + " -30849.132286250067: 4,\n", + " -30849.309451965284: 4,\n", + " -30848.87781108236: 4,\n", + " -30849.056839689063: 4,\n", + " -30849.22055998633: 4,\n", + " -30847.92245278905: 4,\n", + " -30849.499543633094: 4,\n", + " -30849.06882140656: 4,\n", + " -30848.62276719668: 4,\n", + " -30849.035447185826: 4,\n", + " -30848.99539556308: 4,\n", + " -30849.51357191893: 4,\n", + " -30849.334778690714: 4,\n", + " -30849.03998468445: 4,\n", + " -30848.143506932443: 4,\n", + " -30848.87797598336: 4,\n", + " -30849.564710820912: 4,\n", + " -30847.816853019078: 4,\n", + " -30848.902155748863: 4,\n", + " -30849.071584450696: 4,\n", + " -30848.92512297501: 4,\n", + " -30849.12174238221: 4,\n", + " -30849.247838856594: 4})" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Counter(abcd.property('energy', query))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filtering" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1264" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + "}\n", + "abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlpklEQVR4nO3dfXRU1b3/8c+EJJNAmIFEmCQlMZFyCWrxAbkwiA/FaBaXKkqWaC+XUmQVHyJXyG0xWZcHwYdEVgtUG6B1YRQviGbdSktRKMYlvdYkSPCJqgEUmvSGJPZqEkAzoWT//nAxP0dQmWSykxPer7XOWsw+e/b5ntmQfNhzzozLGGMEAABgSVRPFwAAAM4thA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVkX3dAFf1dHRofr6eg0cOFAul6unywEAAGfBGKOjR48qNTVVUVHfvLbR68JHfX290tLSeroMAADQCXV1dRo2bNg39ul14WPgwIGSvije4/H0cDUAAOBstLa2Ki0tLfh7/Jv0uvBx6q0Wj8dD+AAAwGHO5pIJLjgFAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBV0T1dANBVGQXbvrXP4eIpFioBAJwNVj4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWhRU+Tp48qcWLFyszM1Px8fEaPny4HnzwQRljgn2MMVqyZIlSUlIUHx+v7OxsHThwIOKFAwAAZworfDz66KNau3atfvWrX+n999/Xo48+qhUrVujxxx8P9lmxYoUee+wxrVu3TlVVVRowYIBycnLU1tYW8eIBAIDzRIfT+fXXX9fUqVM1ZcoXX0+ekZGhZ599Vrt375b0xarH6tWrtWjRIk2dOlWStGHDBvl8Pm3ZskW33357hMsHAABOE9bKx4QJE1ReXq79+/dLkt5++2299tprmjx5siTp0KFDamhoUHZ2dvA5Xq9X48aNU0VFxRnHDAQCam1tDdkAAEDfFdbKR0FBgVpbW5WVlaV+/frp5MmTevjhhzVjxgxJUkNDgyTJ5/OFPM/n8wX3fVVRUZGWLVvWmdrhcBkF2761z+HiKRYqAQDYFNbKx/PPP6+NGzdq06ZN2rt3r55++mn9/Oc/19NPP93pAgoLC9XS0hLc6urqOj0WAADo/cJa+fjZz36mgoKC4LUb3/ve9/TXv/5VRUVFmjVrlpKTkyVJjY2NSklJCT6vsbFRl1566RnHdLvdcrvdnSwfAAA4TVgrH5999pmiokKf0q9fP3V0dEiSMjMzlZycrPLy8uD+1tZWVVVVye/3R6BcAADgdGGtfNx44416+OGHlZ6erosuukhvvvmmVq5cqTvuuEOS5HK5NH/+fD300EMaMWKEMjMztXjxYqWmpurmm2/ujvoBAIDDhBU+Hn/8cS1evFj33HOPmpqalJqaqjvvvFNLliwJ9lm4cKGOHz+uuXPnqrm5WRMnTtT27dsVFxcX8eIBAIDzuMyXP560F2htbZXX61VLS4s8Hk9Pl4NuFKm7XbhrBgB6Xji/v/luFwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFgV1ne7oO/jo8oBAN2NlQ8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVWGFj4yMDLlcrtO2vLw8SVJbW5vy8vKUlJSkhIQE5ebmqrGxsVsKBwAAzhRW+HjjjTd05MiR4LZz505J0q233ipJWrBggbZu3aqysjLt2rVL9fX1mjZtWuSrBgAAjhUdTuchQ4aEPC4uLtbw4cN1zTXXqKWlRevXr9emTZs0adIkSVJpaalGjRqlyspKjR8/PnJVAwAAx+r0NR/t7e36r//6L91xxx1yuVyqrq7WiRMnlJ2dHeyTlZWl9PR0VVRUfO04gUBAra2tIRsAAOi7Oh0+tmzZoubmZv34xz+WJDU0NCg2NlaDBg0K6efz+dTQ0PC14xQVFcnr9Qa3tLS0zpYEAAAcoNPhY/369Zo8ebJSU1O7VEBhYaFaWlqCW11dXZfGAwAAvVtY13yc8te//lUvv/yyfvvb3wbbkpOT1d7erubm5pDVj8bGRiUnJ3/tWG63W263uzNlAAAAB+rUykdpaamGDh2qKVOmBNvGjBmjmJgYlZeXB9tqampUW1srv9/f9UoBAECfEPbKR0dHh0pLSzVr1ixFR///p3u9Xs2ZM0f5+flKTEyUx+PRvHnz5Pf7udMFAAAEhR0+Xn75ZdXW1uqOO+44bd+qVasUFRWl3NxcBQIB5eTkaM2aNREpFAAA9A1hh48bbrhBxpgz7ouLi1NJSYlKSkq6XBgAAOib+G4XAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFgV3dMFoG/KKNjWq8aJ1LEOF0+xUAkA9G2sfAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwKO3z87//+r/7t3/5NSUlJio+P1/e+9z3t2bMnuN8YoyVLliglJUXx8fHKzs7WgQMHIlo0AABwrrDCx6effqorr7xSMTExeumll/Tee+/pF7/4hQYPHhzss2LFCj322GNat26dqqqqNGDAAOXk5KitrS3ixQMAAOeJDqfzo48+qrS0NJWWlgbbMjMzg382xmj16tVatGiRpk6dKknasGGDfD6ftmzZottvvz1CZQMAAKcKa+Xj97//va644grdeuutGjp0qC677DI98cQTwf2HDh1SQ0ODsrOzg21er1fjxo1TRUXFGccMBAJqbW0N2QAAQN8VVvj46KOPtHbtWo0YMUI7duzQ3XffrX//93/X008/LUlqaGiQJPl8vpDn+Xy+4L6vKioqktfrDW5paWmdOQ8AAOAQYYWPjo4OXX755XrkkUd02WWXae7cufrJT36idevWdbqAwsJCtbS0BLe6urpOjwUAAHq/sMJHSkqKLrzwwpC2UaNGqba2VpKUnJwsSWpsbAzp09jYGNz3VW63Wx6PJ2QDAAB9V1jh48orr1RNTU1I2/79+3X++edL+uLi0+TkZJWXlwf3t7a2qqqqSn6/PwLlAgAApwvrbpcFCxZowoQJeuSRRzR9+nTt3r1bv/nNb/Sb3/xGkuRyuTR//nw99NBDGjFihDIzM7V48WKlpqbq5ptv7o76AQCAw4QVPsaOHasXXnhBhYWFWr58uTIzM7V69WrNmDEj2GfhwoU6fvy45s6dq+bmZk2cOFHbt29XXFxcxIsHAADOE1b4kKQf/OAH+sEPfvC1+10ul5YvX67ly5d3qTAAANA38d0uAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsCrsj1eHc2UUbOtV4wAAzk2sfAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsCit8PPDAA3K5XCFbVlZWcH9bW5vy8vKUlJSkhIQE5ebmqrGxMeJFAwAA5wp75eOiiy7SkSNHgttrr70W3LdgwQJt3bpVZWVl2rVrl+rr6zVt2rSIFgwAAJwtOuwnREcrOTn5tPaWlhatX79emzZt0qRJkyRJpaWlGjVqlCorKzV+/PiuVwsAABwv7JWPAwcOKDU1VRdccIFmzJih2tpaSVJ1dbVOnDih7OzsYN+srCylp6eroqLia8cLBAJqbW0N2QAAQN8VVvgYN26cnnrqKW3fvl1r167VoUOHdNVVV+no0aNqaGhQbGysBg0aFPIcn8+nhoaGrx2zqKhIXq83uKWlpXXqRAAAgDOE9bbL5MmTg38ePXq0xo0bp/PPP1/PP/+84uPjO1VAYWGh8vPzg49bW1sJIAAA9GFdutV20KBB+qd/+icdPHhQycnJam9vV3Nzc0ifxsbGM14jcorb7ZbH4wnZAABA39Wl8HHs2DF9+OGHSklJ0ZgxYxQTE6Py8vLg/pqaGtXW1srv93e5UAAA0DeE9bbLT3/6U9144406//zzVV9fr6VLl6pfv3764Q9/KK/Xqzlz5ig/P1+JiYnyeDyaN2+e/H4/d7oAAICgsMLH3/72N/3whz/U//3f/2nIkCGaOHGiKisrNWTIEEnSqlWrFBUVpdzcXAUCAeXk5GjNmjXdUjgAAHCmsMLH5s2bv3F/XFycSkpKVFJS0qWiAABA38V3uwAAAKvC/oRT9E4ZBdt6ugQAAM4KKx8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKuie7oAfLuMgm09XQIc7Gz+/hwunmKhEgD4AisfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArOpS+CguLpbL5dL8+fODbW1tbcrLy1NSUpISEhKUm5urxsbGrtYJAAD6iE6HjzfeeEO//vWvNXr06JD2BQsWaOvWrSorK9OuXbtUX1+vadOmdblQAADQN3QqfBw7dkwzZszQE088ocGDBwfbW1patH79eq1cuVKTJk3SmDFjVFpaqtdff12VlZURKxoAADhXp8JHXl6epkyZouzs7JD26upqnThxIqQ9KytL6enpqqio6FqlAACgT4gO9wmbN2/W3r179cYbb5y2r6GhQbGxsRo0aFBIu8/nU0NDwxnHCwQCCgQCwcetra3hlgQAABwkrPBRV1en++67Tzt37lRcXFxECigqKtKyZcsiMpYTZRRs6+kSzgk2X+ezOdbh4ikWKgGA3imst12qq6vV1NSkyy+/XNHR0YqOjtauXbv02GOPKTo6Wj6fT+3t7Wpubg55XmNjo5KTk884ZmFhoVpaWoJbXV1dp08GAAD0fmGtfFx33XV69913Q9pmz56trKws3X///UpLS1NMTIzKy8uVm5srSaqpqVFtba38fv8Zx3S73XK73Z0sHwAAOE1Y4WPgwIG6+OKLQ9oGDBigpKSkYPucOXOUn5+vxMREeTwezZs3T36/X+PHj49c1QAAwLHCvuD026xatUpRUVHKzc1VIBBQTk6O1qxZE+nDAAAAh+py+Hj11VdDHsfFxamkpEQlJSVdHRoAAPRBfLcLAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArIr4d7sAOHdlFGz71j6Hi6dYqARAb8bKBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAquieLgBwkoyCbdbGOVw8JSLH6qsiNRe8zoB9rHwAAACrCB8AAMAqwgcAALAqrPCxdu1ajR49Wh6PRx6PR36/Xy+99FJwf1tbm/Ly8pSUlKSEhATl5uaqsbEx4kUDAADnCit8DBs2TMXFxaqurtaePXs0adIkTZ06VX/5y18kSQsWLNDWrVtVVlamXbt2qb6+XtOmTeuWwgEAgDOFdbfLjTfeGPL44Ycf1tq1a1VZWalhw4Zp/fr12rRpkyZNmiRJKi0t1ahRo1RZWanx48dHrmoAAOBYnb7m4+TJk9q8ebOOHz8uv9+v6upqnThxQtnZ2cE+WVlZSk9PV0VFRUSKBQAAzhf253y8++678vv9amtrU0JCgl544QVdeOGFeuuttxQbG6tBgwaF9Pf5fGpoaPja8QKBgAKBQPBxa2truCUBAAAHCXvlY+TIkXrrrbdUVVWlu+++W7NmzdJ7773X6QKKiork9XqDW1paWqfHAgAAvV/Y4SM2Nlbf/e53NWbMGBUVFemSSy7RL3/5SyUnJ6u9vV3Nzc0h/RsbG5WcnPy14xUWFqqlpSW41dXVhX0SAADAObr8OR8dHR0KBAIaM2aMYmJiVF5eHtxXU1Oj2tpa+f3+r32+2+0O3rp7agMAAH1XWNd8FBYWavLkyUpPT9fRo0e1adMmvfrqq9qxY4e8Xq/mzJmj/Px8JSYmyuPxaN68efL7/dzpAgAAgsIKH01NTfrRj36kI0eOyOv1avTo0dqxY4euv/56SdKqVasUFRWl3NxcBQIB5eTkaM2aNd1SOAAAcKawwsf69eu/cX9cXJxKSkpUUlLSpaIAAEDfxXe7AAAAqwgfAADAqrA/ZAxA35NRsO1b+xwunmKhEgDnAlY+AACAVYQPAABgFeEDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVYQPAABgFeEDAABYRfgAAABWRfd0AQA6L6NgW0+XAABhY+UDAABYRfgAAABWET4AAIBVhA8AAGAV4QMAAFhF+AAAAFYRPgAAgFWEDwAAYBXhAwAAWEX4AAAAVhE+AACAVWGFj6KiIo0dO1YDBw7U0KFDdfPNN6umpiakT1tbm/Ly8pSUlKSEhATl5uaqsbExokUDAADnCit87Nq1S3l5eaqsrNTOnTt14sQJ3XDDDTp+/Hiwz4IFC7R161aVlZVp165dqq+v17Rp0yJeOAAAcKawvtV2+/btIY+feuopDR06VNXV1br66qvV0tKi9evXa9OmTZo0aZIkqbS0VKNGjVJlZaXGjx8fucoBAIAjdemaj5aWFklSYmKiJKm6ulonTpxQdnZ2sE9WVpbS09NVUVFxxjECgYBaW1tDNgAA0HeFtfLxZR0dHZo/f76uvPJKXXzxxZKkhoYGxcbGatCgQSF9fT6fGhoazjhOUVGRli1b1tkyAFiSUbCtTx4LgH2dXvnIy8vTvn37tHnz5i4VUFhYqJaWluBWV1fXpfEAAEDv1qmVj3vvvVd/+MMf9Kc//UnDhg0LticnJ6u9vV3Nzc0hqx+NjY1KTk4+41hut1tut7szZQAAAAcKa+XDGKN7771XL7zwgl555RVlZmaG7B8zZoxiYmJUXl4ebKupqVFtba38fn9kKgYAAI4W1spHXl6eNm3apN/97ncaOHBg8DoOr9er+Ph4eb1ezZkzR/n5+UpMTJTH49G8efPk9/u50wUAAEgKM3ysXbtWknTttdeGtJeWlurHP/6xJGnVqlWKiopSbm6uAoGAcnJytGbNmogUCwAAnC+s8GGM+dY+cXFxKikpUUlJSaeLAgAAfRff7QIAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrwvp4dQBA52UUbPvWPoeLp1ioJDxOrRu9FysfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACronu6gL4so2BbT5cAB+urf3+ceF5nU/Ph4inWjhXJ40VKpF4jm681eg4rHwAAwCrCBwAAsIrwAQAArAo7fPzpT3/SjTfeqNTUVLlcLm3ZsiVkvzFGS5YsUUpKiuLj45Wdna0DBw5Eql4AAOBwYYeP48eP65JLLlFJSckZ969YsUKPPfaY1q1bp6qqKg0YMEA5OTlqa2vrcrEAAMD5wr7bZfLkyZo8efIZ9xljtHr1ai1atEhTp06VJG3YsEE+n09btmzR7bff3rVqAQCA40X0mo9Dhw6poaFB2dnZwTav16tx48apoqLijM8JBAJqbW0N2QAAQN8V0fDR0NAgSfL5fCHtPp8vuO+rioqK5PV6g1taWlokSwIAAL1Mj9/tUlhYqJaWluBWV1fX0yUBAIBuFNHwkZycLElqbGwMaW9sbAzu+yq32y2PxxOyAQCAviui4SMzM1PJyckqLy8PtrW2tqqqqkp+vz+ShwIAAA4V9t0ux44d08GDB4OPDx06pLfeekuJiYlKT0/X/Pnz9dBDD2nEiBHKzMzU4sWLlZqaqptvvjmSdQMAAIcKO3zs2bNH3//+94OP8/PzJUmzZs3SU089pYULF+r48eOaO3eumpubNXHiRG3fvl1xcXGRqxoAADhW2OHj2muvlTHma/e7XC4tX75cy5cv71JhAACgb+rxu10AAMC5hfABAACsCvttF6fLKNj2rX0OF0+JyDgAzh22fyZE6nhn8/PuXBap3xkIxcoHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArCJ8AAAAqwgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqlzHG9HQRX9ba2iqv16uWlhZ5PJ6Ij59RsC3iYwIAYMPh4inf2udsfs+dzTjhCuf3NysfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArOq28FFSUqKMjAzFxcVp3Lhx2r17d3cdCgAAOEi3hI/nnntO+fn5Wrp0qfbu3atLLrlEOTk5ampq6o7DAQAAB+mW8LFy5Ur95Cc/0ezZs3XhhRdq3bp16t+/v5588snuOBwAAHCQ6EgP2N7erurqahUWFgbboqKilJ2drYqKitP6BwIBBQKB4OOWlhZJX3w1b3foCHzWLeMCANDdzuZ349n8nuuO37GnxjTGfGvfiIePv//97zp58qR8Pl9Iu8/n0wcffHBa/6KiIi1btuy09rS0tEiXBgCAo3lX965xzuTo0aPyer3f2Cfi4SNchYWFys/PDz7u6OjQJ598oqSkJLlcrogco7W1VWlpaaqrq5PH44nImIg85sk5mCvnYK6coS/MkzFGR48eVWpq6rf2jXj4OO+889SvXz81NjaGtDc2Nio5Ofm0/m63W263O6Rt0KBBkS5LkuTxeBw7qecS5sk5mCvnYK6cwenz9G0rHqdE/ILT2NhYjRkzRuXl5cG2jo4OlZeXy+/3R/pwAADAYbrlbZf8/HzNmjVLV1xxhf75n/9Zq1ev1vHjxzV79uzuOBwAAHCQbgkft912mz7++GMtWbJEDQ0NuvTSS7V9+/bTLkK1xe12a+nSpae9vYPehXlyDubKOZgrZzjX5sllzuaeGAAAgAjhu10AAIBVhA8AAGAV4QMAAFhF+AAAAFY5JnzcdNNNSk9PV1xcnFJSUjRz5kzV19eH9HnnnXd01VVXKS4uTmlpaVqxYsVp46xevVojR45UfHy80tLStGDBArW1tQX3P/DAA3K5XCFbVlZWyBhtbW3Ky8tTUlKSEhISlJube9qHqp3LbM3VlxUXF8vlcmn+/Pkh7R9++KFuueUWDRkyRB6PR9OnT2euvqQ3zVVDQ4Nmzpyp5ORkDRgwQJdffrn++7//O2Ln6mS9ZZ4OHz582s/HU1tZWVlEz9mpestcnVJRUaFJkyZpwIAB8ng8uvrqq/X5559H5Fy7xDjEypUrTUVFhTl8+LD585//bPx+v/H7/cH9LS0txufzmRkzZph9+/aZZ5991sTHx5tf//rXwT4bN240brfbbNy40Rw6dMjs2LHDpKSkmAULFgT7LF261Fx00UXmyJEjwe3jjz8OqeWuu+4yaWlppry83OzZs8eMHz/eTJgwoftfBIewNVen7N6922RkZJjRo0eb++67L9h+7Ngxc8EFF5hbbrnFvPPOO+add94xU6dONWPHjjUnT57s1tfAKXrLXBljzPXXX2/Gjh1rqqqqzIcffmgefPBBExUVZfbu3dtt5+8UvWWe/vGPf4T8bDxy5IhZtmyZSUhIMEePHu3W18ApestcGWPM66+/bjwejykqKjL79u0zH3zwgXnuuedMW1tbt53/2XJM+Piq3/3ud8blcpn29nZjjDFr1qwxgwcPNoFAINjn/vvvNyNHjgw+zsvLM5MmTQoZJz8/31x55ZXBx0uXLjWXXHLJ1x63ubnZxMTEmLKysmDb+++/bySZioqKrp5Wn9Rdc2WMMUePHjUjRowwO3fuNNdcc03IP74dO3aYqKgo09LSEmxrbm42LpfL7Ny5M5Kn2Gf01FwZY8yAAQPMhg0bQtoSExPNE088EYlT61N6cp6+6tJLLzV33HFHF8+o7+rJuRo3bpxZtGhRhM8oMhzztsuXffLJJ9q4caMmTJigmJgYSV8sLV199dWKjY0N9svJyVFNTY0+/fRTSdKECRNUXV2t3bt3S5I++ugjvfjii/qXf/mXkPEPHDig1NRUXXDBBZoxY4Zqa2uD+6qrq3XixAllZ2cH27KyspSenq6KiopuO2en6u65ysvL05QpU0Lm45RAICCXyxXyoT1xcXGKiorSa6+9FvFzdbqenKtT4zz33HP65JNP1NHRoc2bN6utrU3XXnttN5ytc/X0PH1ZdXW13nrrLc2ZMydSp9en9ORcNTU1qaqqSkOHDtWECRPk8/l0zTXX9J6ffT2dfsKxcOFC079/fyPJjB8/3vz9738P7rv++uvN3LlzQ/r/5S9/MZLMe++9F2z75S9/aWJiYkx0dLSRZO66666Q57z44ovm+eefN2+//bbZvn278fv9Jj093bS2thpjvlgOi42NPa22sWPHmoULF0bydB3Nxlw9++yz5uKLLzaff/65McaclvybmpqMx+Mx9913nzl+/Lg5duyYuffee42k045/LusNc2WMMZ9++qm54YYbjCQTHR1tPB6P2bFjR4TP1rl6yzx92d13321GjRoVgbPrW3rDXFVUVBhJJjEx0Tz55JNm7969Zv78+SY2Ntbs37+/G846PD268lFQUPC1Fy+d2j744INg/5/97Gd688039cc//lH9+vXTj370I5kwPqD11Vdf1SOPPKI1a9Zo7969+u1vf6tt27bpwQcfDPaZPHmybr31Vo0ePVo5OTl68cUX1dzcrOeffz6i5+40vW2u6urqdN9992njxo2Ki4s74xhDhgxRWVmZtm7dqoSEBHm9XjU3N+vyyy9XVJQjF/3OihPnSpIWL16s5uZmvfzyy9qzZ4/y8/M1ffp0vfvuu51/MXoxp87TKZ9//rk2bdp0Tqx6OHGuOjo6JEl33nmnZs+ercsuu0yrVq3SyJEj9eSTT3bh1YiQnkw+TU1N5v333//G7cvvi31ZXV2dkWRef/11Y4wxM2fONFOnTg3p88orrxhJ5pNPPjHGGDNx4kTz05/+NKTPM888Y+Lj47/xAsQrrrjCFBQUGGOMKS8vN5LMp59+GtInPT3drFy5MpzTd5TeNlcvvPCCkWT69esX3CQZl8tl+vXrZ/7xj3+EPPfjjz8OzpnP5zMrVqyIwKvSOzlxrg4ePGgkmX379oWMc91115k777wzQq9M7+LEefqyDRs2mJiYGNPU1BShV6T3cuJcffTRR0aSeeaZZ0LGmT59uvnXf/3XCL0yndctXyx3toYMGaIhQ4Z06rmnUl0gEJAk+f1+/ed//qdOnDgRfG9t586dGjlypAYPHixJ+uyzz077H2+/fv0k6WtT6bFjx/Thhx9q5syZkqQxY8YoJiZG5eXlys3NlSTV1NSotrZWfr+/U+fiBL1trq677rrT/kc8e/ZsZWVl6f777w/2PeW8886TJL3yyitqamrSTTfd1KlzcQInztVnn30mSWcc51RNfY0T5+nL1q9fr5tuuqnT5+AkTpyrjIwMpaamqqamJqTf/v37NXny5E6dS0T1aPQ5S5WVlebxxx83b775pjl8+LApLy83EyZMMMOHDw/eMtTc3Gx8Pp+ZOXOm2bdvn9m8ebPp379/yO1LS5cuNQMHDjTPPvus+eijj8wf//hHM3z4cDN9+vRgn//4j/8wr776qjl06JD585//bLKzs815550Xku7vuusuk56ebl555RWzZ8+e026lOpfZnKuvOtP7008++aSpqKgwBw8eNM8884xJTEw0+fn53XLuTtOb5qq9vd1897vfNVdddZWpqqoyBw8eND//+c+Ny+Uy27Zt67bXwAl60zydcuDAAeNyucxLL70U8fN1st42V6tWrTIej8eUlZWZAwcOmEWLFpm4uDhz8ODBbjn/cDgifLzzzjvm+9//vklMTDRut9tkZGSYu+66y/ztb38L6ff222+biRMnGrfbbb7zne+Y4uLikP0nTpwwDzzwgBk+fLiJi4szaWlp5p577gl5C+W2224zKSkpJjY21nznO98xt91222kT9fnnn5t77rnHDB482PTv39/ccsst5siRI912/k5ic66+6kz/+O6//37j8/lMTEyMGTFihPnFL35hOjo6InW6jtbb5mr//v1m2rRpZujQoaZ///5m9OjRp916ey7qbfNkjDGFhYUmLS2Nz8v5it44V0VFRWbYsGGmf//+xu/3m//5n/+JxKl2mcuYMK6CAQAA6KK+e8k/AADolQgfAADAKsIHAACwivABAACsInwAAACrCB8AAMAqwgcAALCK8AEAAKwifAAAAKsIHwAAwCrCBwAAsIrwAQAArPp/zg8kjDr7iyoAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(-30849.977784286697, -30845.753419331242)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = abcd.property('energy', query)\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.show()\n", + "min(data), max(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "968" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = {\n", + " \"bool\": {\n", + " \"must\": [\n", + " {\n", + " \"range\" : {\n", + " \"energy\" : {\n", + " \"gte\" : -30850,\n", + " \"lte\" : -30848,\n", + " }\n", + " }\n", + " },\n", + " {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'count': 968, 'category': 'info', 'dtype': 'scalar(float)'}" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.count_properties(query)['energy']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fetching the data" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "968" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = {\n", + " \"bool\": {\n", + " \"must\": [\n", + " {\n", + " \"range\" : {\n", + " \"energy\" : {\n", + " \"gte\" : -30850,\n", + " \"lte\" : -30848,\n", + " }\n", + " }\n", + " },\n", + " {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "968" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traj = list(abcd.get_atoms(query={'query': query}))\n", + "len(traj)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Atoms(symbols='C48H28O32Zr6', pbc=True, cell=[[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], forces=...)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traj[0]" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 5153eb4a67ddd38927639cc8dc846c4a73a9ed31 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 2 Aug 2023 14:56:38 +0000 Subject: [PATCH 008/112] Add functions required for OpenSearch CLI --- abcd/backends/atoms_opensearch.py | 134 ++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 335eb23e..c951d850 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -4,6 +4,11 @@ from typing import Union, Iterable from os import linesep from datetime import datetime +from collections import Counter +from operator import itemgetter + + +import numpy as np from ase import Atoms from ase.io import iread @@ -62,6 +67,7 @@ def __init__( self, host="localhost", port=9200, + db="abcd", index_name="atoms", username="admin", password="admin", @@ -91,6 +97,7 @@ def __init__( except ConnectionTimeout: raise abcd.errors.TimeoutError() + self.db = db self.index_name = index_name self.create() @@ -102,6 +109,7 @@ def info(self): return { "host": host, "port": port, + "db": self.db, "index": self.index_name, "number of confs": self.client.count(index=self.index_name)["count"], "type": "opensearch" @@ -391,6 +399,11 @@ def count_properties(self, query=None): return properties + def hist(self, name, query=None, **kwargs): + + data = self.property(name, query) + return histogram(name, data, **kwargs) + def __repr__(self): host = self.client.transport.hosts[0]["host"] port = self.client.transport.hosts[0]["port"] @@ -419,6 +432,127 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass +def histogram(name, data, **kwargs): + if not data: + return None + + elif data and isinstance(data, list): + + ptype = type(data[0]) + + if not all(isinstance(x, ptype) for x in data): + print("Mixed type error of the {} property!".format(name)) + return None + + if ptype == float: + bins = kwargs.get('bins', 10) + return _hist_float(name, data, bins) + + elif ptype == int: + bins = kwargs.get('bins', 10) + return _hist_int(name, data, bins) + + elif ptype == str: + return _hist_str(name, data, **kwargs) + + elif ptype == datetime: + bins = kwargs.get('bins', 10) + return _hist_date(name, data, bins) + + else: + print('{}: Histogram for list of {} types are not supported!'.format(name, type(data[0]))) + logger.info('{}: Histogram for list of {} types are not supported!'.format(name, type(data[0]))) + + else: + logger.info('{}: Histogram for {} types are not supported!'.format(name, type(data))) + return None + + +def _hist_float(name, data, bins=10): + data = np.array(data) + hist, bin_edges = np.histogram(data, bins=bins) + + return { + 'type': 'hist_float', + 'name': name, + 'bins': bins, + 'edges': bin_edges, + 'counts': hist, + 'min': data.min(), + 'max': data.max(), + 'median': data.mean(), + 'std': data.std(), + 'var': data.var() + } + + +def _hist_date(name, data, bins=10): + hist_data = np.array([t.timestamp() for t in data]) + hist, bin_edges = np.histogram(hist_data, bins=bins) + + fromtimestamp = datetime.fromtimestamp + + return { + 'type': 'hist_date', + 'name': name, + 'bins': bins, + 'edges': [fromtimestamp(d) for d in bin_edges], + 'counts': hist, + 'min': fromtimestamp(hist_data.min()), + 'max': fromtimestamp(hist_data.max()), + 'median': fromtimestamp(hist_data.mean()), + 'std': fromtimestamp(hist_data.std()), + 'var': fromtimestamp(hist_data.var()) + } + + +def _hist_int(name, data, bins=10): + data = np.array(data) + delta = max(data) - min(data) + 1 + + if bins > delta: + bins = delta + + hist, bin_edges = np.histogram(data, bins=bins) + + return { + 'type': 'hist_int', + 'name': name, + 'bins': bins, + 'edges': bin_edges, + 'counts': hist, + 'min': data.min(), + 'max': data.max(), + 'median': data.mean(), + 'std': data.std(), + 'var': data.var() + } + + +def _hist_str(name, data, bins=10, truncate=20): + n_unique = len(set(data)) + + if truncate: + # data = (item[:truncate] for item in data) + data = (item[:truncate] + '...' if len(item) > truncate else item for item in data) + + data = Counter(data) + + if bins: + labels, counts = zip(*sorted(data.items(), key=itemgetter(1, 0), reverse=True)) + else: + labels, counts = zip(*data.items()) + + return { + 'type': 'hist_str', + 'name': name, + 'total': sum(data.values()), + 'unique': n_unique, + 'labels': labels[:bins], + 'counts': counts[:bins] + } + + if __name__ == "__main__": db = OpenSearchDatabase(username="admin", password="admin") print(db.info()) From 74986beb2eee81e3bd95c4146415ad87170cda0f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 3 Aug 2023 16:02:28 +0000 Subject: [PATCH 009/112] Add function to add properties --- abcd/backends/atoms_opensearch.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index c951d850..35a6bcfa 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -399,6 +399,29 @@ def count_properties(self, query=None): return properties + def add_property(self, data, query=None): + logger.info('add: data={}, query={}'.format(data, query)) + + script_txt = "ctx._source.derived.info_keys.addAll(params.keys);" + for key, val in data.items(): + script_txt += f"ctx._source.{key} = '{val}';" + + body = { + "script": { + "source": script_txt, + "lang": "painless", + "params" : { + "keys" : list(data.keys()) + }, + }, + "query": query + } + + self.client.update_by_query( + index=self.index_name, + body=body, + ) + def hist(self, name, query=None, **kwargs): data = self.property(name, query) From d03fa68d156669492ab152aad08d34f69b46d690 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 3 Aug 2023 17:46:16 +0000 Subject: [PATCH 010/112] Add function to rename properties --- abcd/backends/atoms_opensearch.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 35a6bcfa..8b49797b 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -422,6 +422,33 @@ def add_property(self, data, query=None): body=body, ) + def rename_property(self, name, new_name, query=None): + logger.info('rename: query={}, old={}, new={}'.format(query, name, new_name)) + + script_txt = f"if (!ctx._source.containsKey('{new_name}')) {{ " + script_txt += f"ctx._source.{new_name} = ctx._source.{name}; ctx._source.remove('params.name');" + + script_txt += f"for (int i=0; i Date: Thu, 3 Aug 2023 17:54:50 +0000 Subject: [PATCH 011/112] Add function to delete properties --- abcd/backends/atoms_opensearch.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 8b49797b..d03dde47 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -449,6 +449,31 @@ def rename_property(self, name, new_name, query=None): body=body ) + def delete_property(self, name, query=None): + logger.info('delete: query={}, porperty={}'.format(name, query)) + + script_txt = f"if (ctx._source.containsKey('{name}')) {{ " + script_txt += f"ctx._source.remove('params.name');" + script_txt += f"for (int i=0; i Date: Tue, 8 Aug 2023 17:08:29 +0000 Subject: [PATCH 012/112] Add luqum query parsing for OpenSearch --- abcd/backends/atoms_opensearch.py | 42 +++++++++++++++++++++++++------ pyproject.toml | 1 + 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index d03dde47..5c50f073 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -16,12 +16,16 @@ import abcd.errors from abcd.model import AbstractModel from abcd.database import AbstractABCD +from abcd.queryset import AbstractQuerySet from abcd.parsers import extras from pathlib import Path from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout +from luqum.parser import parser +from luqum.elasticsearch import SchemaAnalyzer, ElasticsearchQueryBuilder + logger = logging.getLogger(__name__) map_types = { @@ -34,6 +38,25 @@ } +class OpenSearchQuery(AbstractQuerySet): + + def __init__(self, client, index_name): + schema = client.indices.get_mapping()[index_name] + schema_analizer = SchemaAnalyzer(schema) + self.message_es_builder = ElasticsearchQueryBuilder(**schema_analizer.query_builder_options()) + + def __call__(self, ast): + logger.info('parsed ast: {}'.format(ast)) + + if isinstance(ast, dict): + return ast + elif isinstance(ast, str): + tree = parser.parse(ast) + return self.message_es_builder(tree) + + return ast if ast else None + + class AtomsModel(AbstractModel): def __init__(self, client=None, index_name=None, dict=None): super().__init__(dict) @@ -100,6 +123,7 @@ def __init__( self.db = db self.index_name = index_name self.create() + self.parser = OpenSearchQuery(self.client, self.index_name) def info(self): host = self.client.transport.hosts[0]["host"] @@ -116,7 +140,7 @@ def info(self): } def delete(self, query=None): - # query = parser(query) + query = self.parser(query) if not query: query = { "match_all": {} @@ -173,7 +197,7 @@ def upload(self, file: Path, extra_infos=None, store_calc=True): self.push(data, extra_info, store_calc=store_calc) def get_atoms(self, query=None): - # query = parser(query) + query = self.parser(query) if not query: query = { "query": { @@ -189,7 +213,7 @@ def get_atoms(self, query=None): yield AtomsModel(None, None, hit["_source"]).to_ase() def count(self, query=None): - # query = parser(query) + query = self.parser(query) logger.info("query; {}".format(query)) if not query: @@ -201,7 +225,7 @@ def count(self, query=None): # Slow - use count_property where possible! def property(self, name, query=None): - # query = parser(query) + query = self.parser(query) if not query: query = { "match_all": {} @@ -220,7 +244,7 @@ def property(self, name, query=None): )] def count_property(self, name, query=None): - # query = parser(query) + query = self.parser(query) if not query: query = { "match_all": {} @@ -250,7 +274,7 @@ def count_property(self, name, query=None): return prop def properties(self, query=None): - # query = parser(query) + query = self.parser(query) if not query: query = { "match_all": {} @@ -339,7 +363,7 @@ def get_type_of_property(self, prop, category): return "scalar({})".format(map_types[type(data)]) def count_properties(self, query=None): - # query = parser(query) + query = self.parser(query) if not query: query = { "match_all": {} @@ -401,6 +425,7 @@ def count_properties(self, query=None): def add_property(self, data, query=None): logger.info('add: data={}, query={}'.format(data, query)) + query = self.parser(query) script_txt = "ctx._source.derived.info_keys.addAll(params.keys);" for key, val in data.items(): @@ -424,6 +449,7 @@ def add_property(self, data, query=None): def rename_property(self, name, new_name, query=None): logger.info('rename: query={}, old={}, new={}'.format(query, name, new_name)) + query = self.parser(query) script_txt = f"if (!ctx._source.containsKey('{new_name}')) {{ " script_txt += f"ctx._source.{new_name} = ctx._source.{name}; ctx._source.remove('params.name');" @@ -451,6 +477,7 @@ def rename_property(self, name, new_name, query=None): def delete_property(self, name, query=None): logger.info('delete: query={}, porperty={}'.format(name, query)) + query = self.parser(query) script_txt = f"if (ctx._source.containsKey('{name}')) {{ " script_txt += f"ctx._source.remove('params.name');" @@ -475,6 +502,7 @@ def delete_property(self, name, query=None): ) def hist(self, name, query=None, **kwargs): + query = self.parser(query) data = self.property(name, query) return histogram(name, data, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 774722fa..0b34115a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ documentation = "https://libatoms.github.io/abcd/" [tool.poetry.dependencies] ase = "^3.23" lark = "^1.1.9" +luqum = "^0.13.0" matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" From 1d4aeff1058762b641a5a797698ef5565d4b16c8 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 9 Aug 2023 12:30:43 +0000 Subject: [PATCH 013/112] Fix unit tests for OpenSearch Made schema analysis optional for query building, primarily since get_mapping is not supported by openmock, although it may be useful in itself. --- abcd/backends/atoms_opensearch.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 5c50f073..f6117ab7 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -40,10 +40,13 @@ class OpenSearchQuery(AbstractQuerySet): - def __init__(self, client, index_name): - schema = client.indices.get_mapping()[index_name] - schema_analizer = SchemaAnalyzer(schema) - self.message_es_builder = ElasticsearchQueryBuilder(**schema_analizer.query_builder_options()) + def __init__(self, client, index_name, analyse_schema=True): + if analyse_schema: + schema = client.indices.get_mapping()[index_name] + schema_analizer = SchemaAnalyzer(schema) + self.query_builder = ElasticsearchQueryBuilder(**schema_analizer.query_builder_options()) + else: + self.query_builder = ElasticsearchQueryBuilder() def __call__(self, ast): logger.info('parsed ast: {}'.format(ast)) @@ -52,7 +55,7 @@ def __call__(self, ast): return ast elif isinstance(ast, str): tree = parser.parse(ast) - return self.message_es_builder(tree) + return self.query_builder(tree) return ast if ast else None @@ -94,6 +97,7 @@ def __init__( index_name="atoms", username="admin", password="admin", + analyse_schema=True, **kwargs): super().__init__() @@ -123,7 +127,7 @@ def __init__( self.db = db self.index_name = index_name self.create() - self.parser = OpenSearchQuery(self.client, self.index_name) + self.parser = OpenSearchQuery(self.client, self.index_name, analyse_schema) def info(self): host = self.client.transport.hosts[0]["host"] From a5231d250a6a95cdf04da6e3d9695664b56fb7a6 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 15 Aug 2023 10:39:28 +0000 Subject: [PATCH 014/112] Refactor default OpenSearch query --- abcd/backends/atoms_opensearch.py | 76 ++++++++++++------------------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index f6117ab7..24c5138b 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -48,16 +48,25 @@ def __init__(self, client, index_name, analyse_schema=True): else: self.query_builder = ElasticsearchQueryBuilder() - def __call__(self, ast): - logger.info('parsed ast: {}'.format(ast)) + def __call__(self, query): + logger.info('parsed query: {}'.format(query)) - if isinstance(ast, dict): - return ast - elif isinstance(ast, str): - tree = parser.parse(ast) + if not query: + query=self.get_default_query() + + if isinstance(query, dict): + return query + elif isinstance(query, str): + tree = parser.parse(query) return self.query_builder(tree) - return ast if ast else None + return query if query else None + + @staticmethod + def get_default_query(): + return { + "match_all": {} + } class AtomsModel(AbstractModel): @@ -145,16 +154,13 @@ def info(self): def delete(self, query=None): query = self.parser(query) - if not query: - query = { - "match_all": {} - } + body = { + "query": query + } self.client.delete_by_query( index=self.index_name, - body={ - "query": query, - }, + body=body, ) def destroy(self): @@ -202,12 +208,9 @@ def upload(self, file: Path, extra_infos=None, store_calc=True): def get_atoms(self, query=None): query = self.parser(query) - if not query: - query = { - "query": { - "match_all": {} - } - } + query = { + "query": query, + } for hit in helpers.scan( self.client, @@ -217,42 +220,31 @@ def get_atoms(self, query=None): yield AtomsModel(None, None, hit["_source"]).to_ase() def count(self, query=None): - query = self.parser(query) logger.info("query; {}".format(query)) + query = self.parser(query) + body = { + "query": query, + } - if not query: - query = { - "match_all": {} - } - - return self.client.count(index=self.index_name, body={"query": query})["count"] + return self.client.count(index=self.index_name, body=body)["count"] # Slow - use count_property where possible! def property(self, name, query=None): query = self.parser(query) - if not query: - query = { - "match_all": {} - } - - body = { + query = { "query": query, } return [hit["_source"][format(name)] for hit in helpers.scan( self.client, index=self.index_name, - query=body, + query=query, stored_fields=format(name), _source=format(name), )] def count_property(self, name, query=None): query = self.parser(query) - if not query: - query = { - "match_all": {} - } body = { "size" : 0, @@ -279,10 +271,6 @@ def count_property(self, name, query=None): def properties(self, query=None): query = self.parser(query) - if not query: - query = { - "match_all": {} - } properties = {} @@ -368,10 +356,6 @@ def get_type_of_property(self, prop, category): def count_properties(self, query=None): query = self.parser(query) - if not query: - query = { - "match_all": {} - } properties = {} From c6853de5a6bf32d63e2a11ae995d7bcc9f0ac1f4 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 15 Aug 2023 15:23:55 +0000 Subject: [PATCH 015/112] Add function to return data as dictionary --- abcd/backends/atoms_opensearch.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 24c5138b..f21e9a65 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -206,6 +206,19 @@ def upload(self, file: Path, extra_infos=None, store_calc=True): data = iread(str(file)) self.push(data, extra_info, store_calc=store_calc) + def get_items(self, query=None): + query = self.parser(query) + query = { + "query": query, + } + + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + ): + yield {'_id': hit['_id'], **hit['_source']} + def get_atoms(self, query=None): query = self.parser(query) query = { From 671c8bed9597596dd79f3deeaf3ea5c7fdf25427 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 15 Aug 2023 17:03:44 +0000 Subject: [PATCH 016/112] Update OpenSearch useage tutorial --- tutorials/abcd_opensearch_usage.ipynb | 409 +++++++++++++++++++++++++- 1 file changed, 408 insertions(+), 1 deletion(-) diff --git a/tutorials/abcd_opensearch_usage.ipynb b/tutorials/abcd_opensearch_usage.ipynb index 0e928ed8..f1de01ce 100644 --- a/tutorials/abcd_opensearch_usage.ipynb +++ b/tutorials/abcd_opensearch_usage.ipynb @@ -1299,7 +1299,7 @@ } ], "source": [ - "traj = list(abcd.get_atoms(query={'query': query}))\n", + "traj = list(abcd.get_atoms(query=query))\n", "len(traj)" ] }, @@ -1324,6 +1324,413 @@ "source": [ "traj[0]" ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "242" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = list(abcd.get_items(query=query))\n", + "len(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'_id': '2UDM2okBtksDlC5r8IGq',\n", + " 'n_atoms': 114,\n", + " 'cell': [[14.759483662029265, 0.0, 0.0],\n", + " [7.380258413807584, 12.781786651387147, 0.0],\n", + " [7.380243655055182, 4.260782501715179, 12.050631347394049]],\n", + " 'pbc': [True, True, True],\n", + " 'formula': 'C48H28O32Zr6',\n", + " 'numbers': [1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40],\n", + " 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296],\n", + " [10.391931260040497, 9.232075241735581, 8.799170748954813],\n", + " [15.152442318761134, 3.2144705981189303, 0.6236271192356346],\n", + " [15.428455018627362, 13.198368239182761, 6.757442369774353],\n", + " [20.968952462595865, 8.354501228588285, 5.937790321351722],\n", + " [12.821718368988067, 11.860905590260213, 0.764468940894911],\n", + " [20.164574198879585, 13.449131931085539, 8.500504258460039],\n", + " [5.203325638335655, 4.037525599970674, 3.6535544413570706],\n", + " [6.476452578322519, 9.882112891744764, 0.7336632917566172],\n", + " [14.332783438660714, 4.5739237510789845, 5.763830060388294],\n", + " [12.20845295758527, 7.975607890442319, 2.7181563401019804],\n", + " [26.126453831046035, 15.25865575215541, 8.681035572143871],\n", + " [7.431639790543854, 7.68880010777489, 3.739705967641281],\n", + " [21.52510600020679, 15.432405681052952, 8.675468268048236],\n", + " [11.49107468172553, 9.60164215963523, 0.7009214784567679],\n", + " [18.70674083756121, 4.607625571215378, 5.677858158016438],\n", + " [17.34676875755316, 10.130528920703508, 4.483049170020872],\n", + " [2.9330861621787743, 3.3990818416373494, 0.720770788622487],\n", + " [22.67189915206641, 9.23882668038352, 8.661796350384211],\n", + " [15.54501705742674, 14.931708899088871, 4.905010140501105],\n", + " [20.180891240581246, 11.991517760259551, 0.36399634878062614],\n", + " [13.537900990107627, 8.71222318139275, 4.81955270950513],\n", + " [13.02082403030889, 3.7798366294145125, 3.8744821907763676],\n", + " [12.738608267554484, 13.15125952920471, 8.605595280531846],\n", + " [9.30512423974256, 4.003262597986021, 2.08391144947309],\n", + " [13.66172535110934, 6.786427797477926, 9.451058899918706],\n", + " [19.297496722626608, 6.66303528741421, 9.65403361924748],\n", + " [16.364750768476505, 11.479160632545504, 9.561987519221761],\n", + " [16.965060879285595, 12.039276627942046, 3.4048076978088133],\n", + " [9.872075532499599, 8.868306222192839, 9.697764141289875],\n", + " [23.093789800187132, 7.8655671087878325, 11.819108411864843],\n", + " [15.963054660441902, 12.957563889928995, 5.796919712452191],\n", + " [20.789096532494103, 7.4060484208004835, 5.418655862348822],\n", + " [20.271838371834924, 15.445983071791856, 11.893743962525676],\n", + " [20.235785459686173, 13.946169611391733, 9.533785527794883],\n", + " [5.468638782470736, 4.963675408702813, 4.207234520017469],\n", + " [13.30165782031905, 14.29182491089219, 11.848895929341783],\n", + " [13.860085269446175, 5.316906102226919, 5.113333629612867],\n", + " [12.690982408563865, 7.210540402496312, 3.452020777408105],\n", + " [26.695527891724396, 14.913992347710204, 9.505586002333807],\n", + " [6.71805210100351, 7.061748291218562, 4.294338782243351],\n", + " [20.991757417986378, 15.139143269943215, 9.631524849633491],\n", + " [19.51051107137078, 14.241804768445284, 11.863723702327984],\n", + " [19.467115888926717, 5.338714780974876, 5.342773829476735],\n", + " [17.04442989892783, 11.179573262577135, 4.5377135674632525],\n", + " [9.75214492091448, 7.909665520118783, 11.856922309351365],\n", + " [23.22825354947115, 8.805217731024735, 9.58442235219589],\n", + " [16.00517946832365, 13.88422938185283, 4.718010810191537],\n", + " [27.480789782170447, 15.480087960917762, 11.617172959535212],\n", + " [13.389347998136012, 7.596486021826197, 4.594226402069221],\n", + " [13.123855701056296, 4.872440299211145, 4.040302647886903],\n", + " [12.64753966896389, 13.717491998222464, 9.492966517597923],\n", + " [20.97530038245366, 8.335251317448973, 10.600423519640026],\n", + " [16.502636637988203, 10.85079294806207, 6.900496977690235],\n", + " [9.019607067373974, 1.3080109208445687, 2.1730394493971033],\n", + " [12.07299373071628, 8.404520741314311, 10.81235772773731],\n", + " [18.747600350981866, 12.258841111031488, 10.56065092033455],\n", + " [7.015172452108002, 5.4262894076561405, 2.347590240376888],\n", + " [18.78251160034123, 7.0857013283180255, 6.946566060828641],\n", + " [14.224469007695566, 4.466485050187827, 10.705760127563483],\n", + " [11.603451262170989, 5.349451807744306, 2.0915175410667244],\n", + " [18.543838401175798, 4.375079148751448, 10.585231910862703],\n", + " [14.160667831243819, 12.098710870007285, 10.795787760496086],\n", + " [14.45018544044642, 7.011509947840652, 6.832073401261807],\n", + " [16.477007379156557, 13.350335720646678, 3.479800617715256],\n", + " [10.582432902092235, 8.377782469927169, 10.803729958995012],\n", + " [22.44315777833504, 8.370354822180708, 10.648120637633818],\n", + " [16.567659589367246, 11.689973911016514, 5.702162918040675],\n", + " [19.752709342190204, 6.556676208576273, 5.912505297587319],\n", + " [20.937902942140084, 15.937436540790538, 10.817725992408736],\n", + " [19.488886710337034, 13.540165709815675, 10.668618978356523],\n", + " [6.371529009087222, 5.8630806488390075, 3.652784530849877],\n", + " [13.287113670442396, 13.309066380643525, 10.810269250499239],\n", + " [13.887372022428522, 6.66488149144997, 5.501186858702026],\n", + " [12.488596287754628, 5.853559681882998, 3.200679859004045],\n", + " [26.74856971012843, 15.90075025114669, 10.529236108850316],\n", + " [10.08315746894864, 1.501050200458931, 1.573941101482483],\n", + " [12.62721678223712, 8.955703669760464, 9.832552292079287],\n", + " [20.39919235012287, 7.4641321602901884, 11.35141521129136],\n", + " [15.34144808796582, 10.482235049392207, 7.284668181164747],\n", + " [19.016384691239136, 8.259448068479994, 7.407999661220768],\n", + " [13.965549620748263, 5.310837048175544, 11.629395661906845],\n", + " [18.964757008970206, 11.57454425853002, 9.498296919639932],\n", + " [6.724169659438005, 4.266531959190234, 1.8722045706136907],\n", + " [14.989309002358754, 11.848517668179902, 11.724244051601735],\n", + " [15.200359309885917, 6.086405481062033, 7.400205812261053],\n", + " [10.683892382055284, 6.184519761996729, 1.7395703404079805],\n", + " [17.662435367568307, 4.527759521878119, 9.64210249881064],\n", + " [7.857986041984485, 6.202923889200398, 1.7950802519056281],\n", + " [14.981497208999828, 4.615121609824532, 9.745747168438765],\n", + " [17.887354642477394, 12.0129465321045, 11.494403179921116],\n", + " [17.806453279094754, 6.309886859779258, 7.194991281595532],\n", + " [17.65884499015476, 10.488278242553303, 7.416379002429795],\n", + " [12.548166448986402, 7.693791677648976, 11.776895469518676],\n", + " [20.34393939120336, 9.206817729894354, 9.922408032378584],\n", + " [7.857463468905509, 1.74909233925955, 1.8274196418121598],\n", + " [18.857763430781485, 5.182306608404846, 11.489619301518633],\n", + " [14.163844810128705, 8.142657329873755, 7.360230728587728],\n", + " [11.792894920225645, 4.155462409412462, 1.7001921686281716],\n", + " [13.835964747759567, 11.264146337910587, 9.884038668157677],\n", + " [18.442882359243793, 7.121400257955315, 9.800715117804442],\n", + " [16.392129968717537, 10.582139510371382, 9.857218178507331],\n", + " [9.302433061412287, 3.9652007015997683, 1.0673996577713942],\n", + " [14.478398449342821, 7.162072897735833, 9.816748197723141],\n", + " [15.11113884153613, 9.160319431120458, 11.201390619831347],\n", + " [16.44457564782678, 6.551160139418165, 11.181147839950713],\n", + " [16.50639593935091, 8.350138210113135, 8.738223430491407],\n", + " [18.033642210935266, 9.122422759734661, 11.23846663261546],\n", + " [10.893906920444666, 2.90658841770915, -0.019051977695474183],\n", + " [14.757549238860731, 9.308555112561303, 9.214183379238824],\n", + " [18.225880348003468, 9.344784918068, 9.180851999025142],\n", + " [7.388359137923477, 3.030521667505481, 0.14692076074094484],\n", + " [9.220983449272417, 6.075785767588413, 0.00657883162539968],\n", + " [16.440728831136994, 6.2934328881825925, 9.187973671216682]],\n", + " 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563],\n", + " [0.16925367694563342, -0.01943702713953078, 0.38893903196958485],\n", + " [0.01574595116377608, 0.17132290092535438, -0.1999552221020049],\n", + " [0.6313750521363777, 0.05251478601615336, -0.8064430222079316],\n", + " [-0.09833287623511343, -0.138000887230052, -0.08874934559146055],\n", + " [0.19781246456634455, -0.9287673780647797, -1.0439826331463689],\n", + " [0.07987955323902354, 0.3227860853196942, 1.7840037712935266],\n", + " [0.3716884711227413, 0.4696845328184121, 0.370453313071228],\n", + " [-0.14715298673081575, -0.28619517081945, -0.2515490388965677],\n", + " [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577],\n", + " [0.9120773177492224, -1.200046035662623, 1.3240873743396222],\n", + " [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373],\n", + " [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696],\n", + " [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312],\n", + " [1.1617998448227365, 0.7015918847484289, -1.3710321220485349],\n", + " [0.19070902021846559, 0.8070636865577138, 0.18004202662121627],\n", + " [0.22735084256512936, 0.16909786808234928, -0.1733360748519467],\n", + " [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667],\n", + " [1.0559542045857038, -0.7522446777152361, 1.3649098107407422],\n", + " [0.7762546692811232, -1.6189836731314526, -0.7746117342375823],\n", + " [0.028996903632322036, -0.4549212573567351, -0.15349589870642655],\n", + " [-0.30669869163043734, -1.474741689618629, -0.22373535702799768],\n", + " [-0.0288004713360364, 0.7124475973319003, -0.205686211479239],\n", + " [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461],\n", + " [0.024624999488289372, -0.06313755663570486, -1.801891965772292],\n", + " [-0.06351139506345264, -0.159376012078223, -0.050473844173332186],\n", + " [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655],\n", + " [0.16200265126520474, 1.5060489867253262, -0.5510645819751359],\n", + " [0.11499568285511277, -1.536591123473717, -0.46194088373525655],\n", + " [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978],\n", + " [1.3289642431824835, 0.5107142001499065, -0.8944297785444403],\n", + " [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165],\n", + " [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311],\n", + " [-1.8548078439117515, 0.38714131650410166, 2.989239836187592],\n", + " [0.4506861359111631, 1.1364487657471294, -0.6025277867192889],\n", + " [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987],\n", + " [1.0444392611021323, -2.684139342402327, -1.0437898003947796],\n", + " [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224],\n", + " [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265],\n", + " [0.1290364782741685, 1.0164497157641128, 0.008965437397225301],\n", + " [0.2684093062541945, -0.321198686108611, -1.4872871313266933],\n", + " [-0.643392389215423, -0.898736890883939, -1.098486424717582],\n", + " [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579],\n", + " [0.8447910287406576, -1.5317872739661245, -1.3569157361908561],\n", + " [0.17489879147082638, 0.8299521628403577, -1.920810638125738],\n", + " [0.3438521635446514, 0.034021868028403804, 0.7196574853586566],\n", + " [-0.49089716393457716, 1.32019318119886, -0.94368029174169],\n", + " [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858],\n", + " [-0.5808395014825368, 3.6184937280564875, 2.299079591174048],\n", + " [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825],\n", + " [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758],\n", + " [0.545840099958743, 0.6951430433146117, 4.287385291900738],\n", + " [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216],\n", + " [1.0342078124131395, -1.450620626387826, 0.9064692271123347],\n", + " [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989],\n", + " [-1.71670514098886, -1.4817345765222634, 0.997617412133566],\n", + " [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182],\n", + " [0.25302742332541905, -0.05521598720041631, 0.07509935788229247],\n", + " [0.4694140527455162, -0.5513201496485756, -0.6341384740218201],\n", + " [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704],\n", + " [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858],\n", + " [0.303680730512897, -1.6201972339147879, -2.1682659662612322],\n", + " [-1.573935371256837, -0.7716241121396255, 0.4766830761494265],\n", + " [0.8572325978731995, -0.38723439044553537, 0.7948401469896356],\n", + " [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165],\n", + " [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755],\n", + " [0.898834592811411, -1.0161931196493315, 0.970241846276607],\n", + " [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992],\n", + " [-0.24175827732254024, 1.3876321937466838, 0.534561183763117],\n", + " [0.6898959355886991, -1.1971756558776325, -2.6977317373963534],\n", + " [0.3469128049778768, -0.8706239325849023, -0.7535286867304852],\n", + " [0.5164133078455445, -1.1737287359463862, -0.8774512404325054],\n", + " [0.07070174270471455, 2.0437350895055317, -2.2527981881306025],\n", + " [0.3288256070994712, -0.3856912142121501, -0.4543299035851946],\n", + " [0.46220673582211436, -1.5513682828935087, 0.2903063650833339],\n", + " [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991],\n", + " [1.9752635218505312, 0.3340079230208498, -1.0452476159967947],\n", + " [0.8562020996487061, 0.4789574741768384, -0.4695076409076208],\n", + " [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598],\n", + " [0.4557039012178522, 0.3706605440016026, 0.8159874720805957],\n", + " [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655],\n", + " [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426],\n", + " [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057],\n", + " [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977],\n", + " [0.5114222820137425, 0.9816344052405028, 0.021354041800663897],\n", + " [-1.066992979728017, 1.3317744591489784, -0.48617301863097473],\n", + " [0.22686850357582064, -0.6558709822362698, -0.6488395287823175],\n", + " [0.7797446849745541, 0.08798264257114148, 0.9232755012995165],\n", + " [0.23828317402856555, 0.1996127511351823, 0.16316684686413324],\n", + " [1.8341906803324939, 0.7465810512458905, -1.8839816394548647],\n", + " [0.25763432631605526, -0.3878936213456354, -0.15371958469827013],\n", + " [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458],\n", + " [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378],\n", + " [1.5710907025053915, 0.6496761658138797, -0.2693559865093303],\n", + " [0.13329885341529846, -0.16092278785630468, -0.8911017423623445],\n", + " [0.4560890525003599, -0.12238914766138208, 0.02436686071149516],\n", + " [0.04387896406897804, 0.4783136698968656, 0.7284398601970398],\n", + " [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581],\n", + " [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554],\n", + " [0.5963838781433183, -0.31271095971465634, -1.0048920919648563],\n", + " [0.2392545368759047, -0.07763909377589129, -0.3403374652590171],\n", + " [1.0458302280169298, -0.7758582051438556, -0.16412124042933254],\n", + " [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372],\n", + " [0.310471014472195, 0.4872791072940909, 0.18266249516014715],\n", + " [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393],\n", + " [0.23109436904931635, 0.627365159344662, 1.1682219467816664],\n", + " [0.06169413921247506, -0.7133752514222126, 0.5373045510423942],\n", + " [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393],\n", + " [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554],\n", + " [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261],\n", + " [0.753222725431297, 0.3270011521591009, -0.33560457820400924],\n", + " [-0.7786571082555904, 0.413619053069661, -1.222248027349609],\n", + " [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947],\n", + " [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]],\n", + " 'energy': -30848.841105643754,\n", + " 'volume': 2273.382588904185,\n", + " 'elements': {'1': 28, '6': 48, '8': 32, '40': 6},\n", + " 'username': 'ubuntu',\n", + " 'uploaded': '2023-08-09T14:56:51.365526',\n", + " 'modified': '2023-08-09T14:56:51.365533',\n", + " 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850',\n", + " 'hash': '96290c6b21554ece4011f07e63de82d3',\n", + " 'derived': {'arrays_keys': ['forces', 'positions', 'numbers'],\n", + " 'info_keys': ['pbc', 'n_atoms', 'cell', 'formula', 'energy', 'volume'],\n", + " 'results_keys': [],\n", + " 'derived_keys': ['elements',\n", + " 'username',\n", + " 'uploaded',\n", + " 'modified',\n", + " 'volume',\n", + " 'hash_structure',\n", + " 'hash']}}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[0]" + ] } ], "metadata": { From 426d80ba04865ec98af6069d9dc00968cad561cf Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 16 Aug 2023 16:05:45 +0000 Subject: [PATCH 017/112] Add function to delete documents by ID --- abcd/backends/atoms_opensearch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index f21e9a65..b4916d8b 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -94,6 +94,10 @@ def save(self): body["derived"] = self.derived self._client.index(index=self._index_name, body=body) + def remove(self): + if self._id: + self._client.delete(index=self._index_name, id=self._id) + self.clear() class OpenSearchDatabase(AbstractABCD): """Wrapper to make database operations easy""" From c121c406f2063c964b60dc9f0ec335898e3cbadf Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 16 Aug 2023 17:17:23 +0000 Subject: [PATCH 018/112] Add function to update documents by ID --- abcd/backends/atoms_opensearch.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index b4916d8b..92f30a7e 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -6,7 +6,7 @@ from datetime import datetime from collections import Counter from operator import itemgetter - +from pathlib import Path import numpy as np @@ -19,8 +19,6 @@ from abcd.queryset import AbstractQuerySet from abcd.parsers import extras -from pathlib import Path - from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout from luqum.parser import parser @@ -88,11 +86,15 @@ def _id(self): return self.get("_id", None) def save(self): + body = {} + body.update(self.data) + body["derived"] = self.derived if not self._id: - body = {} - body.update(self.data) - body["derived"] = self.derived self._client.index(index=self._index_name, body=body) + else: + body.pop('_id', None) + body = {"doc": body} + self._client.update(index=self._index_name, id=self._id, body=body) def remove(self): if self._id: From ad9a819bf8dbabad722dcb9194b2abc0f4a50d6f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 17 Aug 2023 10:13:51 +0000 Subject: [PATCH 019/112] Update extra info parsing for Lucene-like inputs Add option for colons when parsing key-value pairs for extra information, to allow formatting closer to Lucene queries --- abcd/parsers/extras.py | 2 +- tests/test_parsers.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/abcd/parsers/extras.py b/abcd/parsers/extras.py index af99fd01..08bae00f 100644 --- a/abcd/parsers/extras.py +++ b/abcd/parsers/extras.py @@ -6,7 +6,7 @@ start: ( key | key_value )* key: NAME - key_value: NAME "=" value + key_value: NAME ("="|":") value NAME: ("_"|LETTER|DIGIT) ("_"|"-"|LETTER|DIGIT)* diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 2a6383b2..a4a6f7ea 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -53,6 +53,7 @@ def test_string(self, parser, string, expected): ("true_value", {"true_value": True}), ("true_value_long = true", {"true_value_long": True}), ("false_value = F", {"false_value": False}), + ("false_value_colon: F", {"false_value_colon": False}), ], ) def test_boolean(self, parser, string, expected): @@ -66,6 +67,7 @@ def test_boolean(self, parser, string, expected): ("floating=1.1", {"floating": 1.1}), ("scientific_float=1.2e7", {"scientific_float": 1.2e7}), ("scientific_float_2=5e-6", {"scientific_float_2": 5e-6}), + ("floating_colon: 3.14", {"floating_colon": 3.14}), ], ) def test_numbers(self, parser, string, expected): @@ -87,6 +89,7 @@ def test_numbers(self, parser, string, expected): "array_bool_commas=[T, T, F, T]", {"array_bool_commas": [True, True, False, True]}, ), + ("int_array_colon: {4 2}", {"int_array_colon": [4, 2]}), ], ) def test_arrays(self, parser, string, expected): @@ -125,6 +128,17 @@ def test_composite(self, parser): out = parser.parse(composite_string) assert out == composite_expected + @pytest.mark.parametrize( + "string, expected", + [ + ('colon_string:"astring"', {'colon_string': 'astring'}), + ('colon_string_spaces : "astring"', {'colon_string_spaces': 'astring'}), + ], + ) + def test_colon_key_value_pairs(self, parser, string, expected): + """Key value pairs separated by colons""" + assert expected == parser.parse(string) + @pytest.mark.skip @pytest.mark.parametrize( "string", @@ -165,6 +179,12 @@ def test_operators(self, parser, string, expected): """Operators""" assert parser.parse(string) + + def test_colon_key_value_pairs(self, parser, string, expected): + """Key value pairs separated by colons""" + assert expected == parser.parse(string) + + @pytest.mark.parametrize( "string, expected", [ From a92ab9643361db15324ce8bff2004ac8985de67f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 18 Aug 2023 16:08:49 +0000 Subject: [PATCH 020/112] Add docstrings and type hints --- abcd/backends/atoms_opensearch.py | 522 +++++++++++++++++++++++++++--- 1 file changed, 472 insertions(+), 50 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 92f30a7e..8267deb7 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -1,6 +1,8 @@ -import types +from __future__ import annotations + import logging +from collections.abc import Generator from typing import Union, Iterable from os import linesep from datetime import datetime @@ -37,16 +39,60 @@ class OpenSearchQuery(AbstractQuerySet): - - def __init__(self, client, index_name, analyse_schema=True): - if analyse_schema: + """ + Class to parse and build queries for OpenSearch. + + Attributes + ---------- + query_builder: ElasticsearchQueryBuilder + Query builder to convert a Tree in an OpenSearch query. + """ + def __init__(self, + client: Union[OpenSearch, None] = None, + index_name: Union[str, None] = None, + analyse_schema: bool = False + ): + """" + Initialises class. + + Parameters + ---------- + client: Union[OpenSearch, None] + OpenSearch client, used for if analyse_schema is `True` to + characterise the schema. Default is `None`. + index_name: Union[str, None] + Name of OpenSearch index to be analysed, used if analyse_schema + is `True` to characterise the schema. Default is `None`. + analyse_schema: bool, optional + Whether to analyse the schema, as defined by the index_name and client. + Default is `False`. + """ + if analyse_schema and client is not None and index_name is not None: schema = client.indices.get_mapping()[index_name] schema_analizer = SchemaAnalyzer(schema) self.query_builder = ElasticsearchQueryBuilder(**schema_analizer.query_builder_options()) else: self.query_builder = ElasticsearchQueryBuilder() - def __call__(self, query): + def __call__( + self, + query: Union[dict, str, None] + ) -> Union[dict, None]: + """ + Parses and builds queries from strings using ElasticsearchQueryBuilder. + + Parameters + ---------- + query: Union[dict, str, None] + Query to be parsed for OpenSearch. If given as a dictionary, + the query is left unchanged. If given as a string, the + ElasticsearchQueryBuilder is used to build the query. + + Returns + ------- + Union[dict, None] + The parsed query for OpenSearch. + """ logger.info('parsed query: {}'.format(query)) if not query: @@ -61,21 +107,84 @@ def __call__(self, query): return query if query else None @staticmethod - def get_default_query(): + def get_default_query() -> dict: + """ + Defines a default OpenSearch query. Currently, matches all documents. + + Returns + ------- + The default query for OpenSearch. + """ return { "match_all": {} } class AtomsModel(AbstractModel): - def __init__(self, client=None, index_name=None, dict=None): + """ + Class to interface between Atoms data and OpenSearch. + + Attributes + ---------- + _client: Union[OpenSearch, None] + OpenSearch client. + _index_name: Union[str, None] + OpenSearch index name. + """ + def __init__( + self, + client: Union[OpenSearch, None] = None, + index_name: Union[str, None] = None, + dict: Union[dict, None] = None + ): + """ + Initialises class. + + Parameters + ---------- + client: Union[OpenSearch, None] + OpenSearch client. + index_name: Union[str, None] + OpenSearch index name. + dict: dict + Dictionary of atoms data. + """ super().__init__(dict) self._client = client self._index_name = index_name @classmethod - def from_atoms(cls, client, index_name, atoms: Atoms, extra_info=None, store_calc=True): + def from_atoms( + cls, + client: OpenSearch, + index_name: str, + atoms: Atoms, + extra_info: Union[dict, None] = None, + store_calc: bool = True + ) -> AtomsModel: + """ + Reads and prepares atoms data and extra information for OpenSearch. + + Parameters + ---------- + client: OpenSearch + OpenSearch client. + index_name: str + OpenSearch index name. + atoms: Atoms + Atoms data to be stored. + extra_info: Union[dict, None], optional + Extra information to store in the document with the atoms data. + Default is `None`. + store_calc: bool, optional + Whether to store data from the calculator attached to atoms. + Default is `True`. + + Returns + ------- + Data from atoms and extra information to be saved in OpenSearch. + """ obj = super().from_atoms(atoms, extra_info, store_calc) obj._client = client obj._index_name = index_name @@ -83,38 +192,90 @@ def from_atoms(cls, client, index_name, atoms: Atoms, extra_info=None, store_cal @property def _id(self): + """ + Gets the OpenSearch document ID stored in data. + + Returns + ------- + Current document ID. + """ return self.get("_id", None) def save(self): + """ + Saves data in OpenSearch. If the data being saved includes a document + ID, updates the matching document in OpenSearch with the current data. + """ body = {} body.update(self.data) body["derived"] = self.derived - if not self._id: - self._client.index(index=self._index_name, body=body) - else: - body.pop('_id', None) - body = {"doc": body} - self._client.update(index=self._index_name, id=self._id, body=body) + if self._client is not None: + if not self._id: + self._client.index(index=self._index_name, body=body) + else: + body.pop('_id', None) + body = {"doc": body} + self._client.update(index=self._index_name, id=self._id, body=body) def remove(self): - if self._id: + """ + If current data includes a document ID, deletes the matching document + OpenSearch. + """ + if self._client is not None and self._id: self._client.delete(index=self._index_name, id=self._id) self.clear() + class OpenSearchDatabase(AbstractABCD): - """Wrapper to make database operations easy""" + """ + Wrapper to make OpenSearch operations easy. + + Attributes + ---------- + client: OpenSearch + OpenSearch client. + db: str + Database name. + index_name: str + OpenSearch index name. + parser: OpenSearchQuery + Query parser and builder for OpenSearch queries. + """ def __init__( - self, - host="localhost", - port=9200, - db="abcd", - index_name="atoms", - username="admin", - password="admin", - analyse_schema=True, - **kwargs): - + self, + host: str = "localhost", + port: int = 9200, + db: str = "abcd", + index_name: str = "atoms", + username: str = "admin", + password: str = "admin", + analyse_schema: bool = True, + **kwargs + ): + """ + Initialises class. + + Parameters + ---------- + host: str, optional + Name of OpenSearch host. Default is `localhost`. + port: int, optional + OpenSearch port. Default is `9200`. + db: str, optional + Label for OpenSearch database. Used only when printing information. + Default is `abcd`. + index_name: str, optional + Name of OpenSearch index. Default is `atoms`. + username: str, optional + OpenSearch username. Default is `admin`. + password: str, optional + OpenSearch password. Default is `admin`. + analyse_schema: bool, optional + Whether to analyse the OpenSearch schema when building queries. + Default is `True`. + """ super().__init__() logger.info((host, port, index_name, username, password, kwargs)) @@ -145,6 +306,13 @@ def __init__( self.parser = OpenSearchQuery(self.client, self.index_name, analyse_schema) def info(self): + """ + Gets information from OpenSearch client about the database. + + Returns + ------- + Dictionary of database information. + """ host = self.client.transport.hosts[0]["host"] port = self.client.transport.hosts[0]["port"] @@ -158,7 +326,18 @@ def info(self): "type": "opensearch" } - def delete(self, query=None): + def delete( + self, + query: Union[dict, str, None] = None + ): + """ + Deletes documents from the database. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to be deleted. Default is `None`. + """ query = self.parser(query) body = { "query": query @@ -170,16 +349,49 @@ def delete(self, query=None): ) def destroy(self): + """ + Deletes the current index in OpenSearch. + Ignores errors if the index does not exist. + """ self.client.indices.delete(index=self.index_name, ignore=404) def create(self): + """ + Creates a new index in OpenSearch. + Ignores errors if the index already exists. + """ self.client.indices.create(index=self.index_name, ignore=400) def save_bulk(self, actions: Iterable): + """ + Save a collection of documents in bulk. + + Parameters + ---------- + actions: Iterable + Documents to be saved. + """ helpers.bulk(client=self.client, actions=actions, index=self.index_name) - def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): - + def push( + self, + atoms: Union[Atoms, Iterable], + extra_info: Union[dict, None] = None, + store_calc: bool = True + ): + """ + Save data from atoms object(s) to database. + + Parameters + ---------- + atoms: Union[Atoms, Iterable] + extra_info: Union[dict, None], optional + Extra information to store in the document with the atoms data. + Default is `None`. + store_calc: bool, optional + Whether to store data from the calculator attached to atoms. + Default is `True`. + """ if extra_info and isinstance(extra_info, str): extra_info = extras.parser.parse(extra_info) @@ -188,7 +400,7 @@ def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): data = AtomsModel.from_atoms(self.client, self.index_name, atoms, extra_info=extra_info, store_calc=store_calc) data.save() - elif isinstance(atoms, types.GeneratorType) or isinstance(atoms, list): + elif isinstance(atoms, Generator) or isinstance(atoms, list): actions = [] for item in atoms: @@ -197,7 +409,26 @@ def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): actions[-1]["derived"] = data.derived self.save_bulk(actions) - def upload(self, file: Path, extra_infos=None, store_calc=True): + def upload( + self, + file: Path, + extra_infos: Union[Iterable, dict, None] = None, + store_calc: bool = True + ): + """ + Upload data from a file to the database. + + Parameters + ---------- + file: Path + Path to file to be uploaded + extra_infos: Union[Iterable, dict, None], optional + Extra information to store in the document with the atoms data. + Default is `None`. + store_calc: bool, optional + Whether to store data from the calculator attached to atoms. + Default is `True`. + """ if isinstance(file, str): file = Path(file) @@ -212,7 +443,22 @@ def upload(self, file: Path, extra_infos=None, store_calc=True): data = iread(str(file)) self.push(data, extra_info, store_calc=store_calc) - def get_items(self, query=None): + def get_items( + self, + query: Union[dict, str, None] = None + ) -> Generator[dict, None, None]: + """ + Get data as a dictionary from documents in the database. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to get data from. Default is `None`. + + Returns + ------- + Generator for dictionary of data. + """ query = self.parser(query) query = { "query": query, @@ -225,7 +471,22 @@ def get_items(self, query=None): ): yield {'_id': hit['_id'], **hit['_source']} - def get_atoms(self, query=None): + def get_atoms( + self, + query: Union[dict, str, None] = None + ) -> Generator[Atoms, None, None]: + """ + Get data as Atoms object from documents in the database. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to get data from. Default is `None`. + + Returns + ------- + Generator for AtomsModel object of data. + """ query = self.parser(query) query = { "query": query, @@ -238,7 +499,22 @@ def get_atoms(self, query=None): ): yield AtomsModel(None, None, hit["_source"]).to_ase() - def count(self, query=None): + def count( + self, + query: Union[dict, str, None] = None + ) -> int: + """ + Counts number of documents in the database. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to be counted. Default is `None`. + + Returns + ------- + Count of number of documents. + """ logger.info("query; {}".format(query)) query = self.parser(query) body = { @@ -247,8 +523,21 @@ def count(self, query=None): return self.client.count(index=self.index_name, body=body)["count"] - # Slow - use count_property where possible! - def property(self, name, query=None): + def property(self, name, query: Union[dict, str, None] = None) -> list: + """ + Gets all values of a specified property for matching documents in the + database. This method is very slow, so it is preferable to use + alternative methods where possible, such as count_property. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to get properties from. Default is `None`. + + Returns + ------- + List of values for the specified property for all matching documents. + """ query = self.parser(query) query = { "query": query, @@ -262,7 +551,22 @@ def property(self, name, query=None): _source=format(name), )] - def count_property(self, name, query=None): + def count_property(self, name, query: Union[dict, str, None] = None) -> dict: + """ + Counts values of a specified property for matching documents in the + database. This method much faster than performing a Count on the list + returned by self.property, so this method should be used preferentially. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to count properties from. Default is `None`. + + Returns + ------- + Dictionary of values and counts for the specified property for all + matching documents. + """ query = self.parser(query) body = { @@ -288,12 +592,29 @@ def count_property(self, name, query=None): return prop - def properties(self, query=None): + def properties(self, query: Union[dict, str, None] = None) -> dict: + """ + Gets lists of all properties from matching documents, separated into + info, derived, and array properties. + + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to get properties from. Default is `None`. + + Returns + ------- + Dictionary of properties, with keys corresponding to info, derived, + and arrays of properties, and values corresponding to a list of + the properties of that type. + """ query = self.parser(query) properties = {} - for prop in self.client.indices.get_mapping(self.index_name)[self.index_name]["mappings"]["properties"].keys(): + for prop in self.client.indices.get_mapping( + index=self.index_name + )[self.index_name]["mappings"]["properties"].keys(): body = { "size" : 0, @@ -340,7 +661,22 @@ def properties(self, query=None): return properties - def get_type_of_property(self, prop, category): + def get_type_of_property(self, prop: str, category: str) -> str: + """ + Gets type of a property, given its category. + + Parameters + ---------- + prop: str + Name of the property. + catagory: str + Name of property's category. Current options are `info`, `derived`, + and `arrays`. + + Returns + ------- + Type of the property. + """ # TODO: Probably it would be nicer to store the type info in the database from the beginning. atoms = self.client.search( index=self.index_name, @@ -373,13 +709,27 @@ def get_type_of_property(self, prop, category): else: return "scalar({})".format(map_types[type(data)]) - def count_properties(self, query=None): - query = self.parser(query) + def count_properties(self, query: Union[dict, str, None] = None) -> dict: + """ + Counts all properties from matching documents. + Parameters + ---------- + query: Union[dict, str, None] + Query to filter documents to count properties from. Default is `None`. + + Returns + ------- + Dictionary of properties, with keys property names, and values + corresponding to their counts, categories and data types. + """ + query = self.parser(query) properties = {} try: - keys = self.client.indices.get_mapping(self.index_name)[self.index_name]["mappings"]["properties"].keys() + keys = self.client.indices.get_mapping( + index=self.index_name + )[self.index_name]["mappings"]["properties"].keys() except KeyError: return properties @@ -430,7 +780,17 @@ def count_properties(self, query=None): return properties - def add_property(self, data, query=None): + def add_property(self, data: dict, query: Union[dict, str, None] = None): + """ + Adds properties to matching documents. + + Parameters + ---------- + data: dict + Property key-value pairs to be added to matching documents. + query: Union[dict, str, None] + Query to filter documents to add properties to. Default is `None`. + """ logger.info('add: data={}, query={}'.format(data, query)) query = self.parser(query) @@ -454,7 +814,24 @@ def add_property(self, data, query=None): body=body, ) - def rename_property(self, name, new_name, query=None): + def rename_property( + self, + name: str, + new_name: str, + query: Union[dict, str, None] = None + ): + """ + Renames property for all matching documents. + + Parameters + ---------- + name: str + Current name of property to be renamed. + new_name: str + New name of property to be renamed. + query: Union[dict, str, None] + Query to filter documents to rename property. Default is `None`. + """ logger.info('rename: query={}, old={}, new={}'.format(query, name, new_name)) query = self.parser(query) @@ -482,7 +859,17 @@ def rename_property(self, name, new_name, query=None): body=body ) - def delete_property(self, name, query=None): + def delete_property(self, name: str, query: Union[dict, str, None] = None): + """ + Deletes property from all matching documents. + + Parameters + ---------- + name: str + Name of property to be deleted from documents. + query: Union[dict, str, None] + Query to filter documents to have property deleted. Default is `None`. + """ logger.info('delete: query={}, porperty={}'.format(name, query)) query = self.parser(query) @@ -508,13 +895,41 @@ def delete_property(self, name, query=None): body=body ) - def hist(self, name, query=None, **kwargs): + def hist( + self, + name: str, + query: Union[dict, str, None] = None, + **kwargs + ) -> Union[dict, None]: + """ + Calculate histogram statistics for a property from all matching documents. + + Parameters + ---------- + name: str + Name of property. + query: Union[dict, str, None] + Query to filter documents. Default is `None`. + + Returns + ------- + Dictionary containing histogram statistics, including the number of + bins, edges, counts, min, max, and standard deviation. + """ query = self.parser(query) data = self.property(name, query) return histogram(name, data, **kwargs) def __repr__(self): + """ + OpenSearch class representation. + + Returns + ------- + String for OpenSearch class representation, containing the connected + database host, port, and index name. + """ host = self.client.transport.hosts[0]["host"] port = self.client.transport.hosts[0]["port"] @@ -523,12 +938,19 @@ def __repr__(self): "index={}) ".format(self.index_name) def _repr_html_(self): - """Jupyter notebook representation""" + """ + Jupyter notebook representation of OpenSearch class. + + Returns + ------- + String for HTML representation. + """ return "ABCD OpenSearch database" def print_info(self): - """shows basic information about the connected database""" - + """ + Show basic information about the connected OpenSearch database. + """ out = linesep.join(["{:=^50}".format(" ABCD OpenSearch "), "{:>10}: {}".format("type", "opensearch"), linesep.join("{:>10}: {}".format(k, v) for k, v in self.info().items())]) From 43629e535ddf0416a270c70a841de2a1484391d7 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 21 Aug 2023 10:25:30 +0000 Subject: [PATCH 021/112] Tidy code Removes Pylance type errors due to incorrect type hints from lark parser --- abcd/backends/atoms_opensearch.py | 40 +++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 8267deb7..c8abac89 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -313,8 +313,11 @@ def info(self): ------- Dictionary of database information. """ - host = self.client.transport.hosts[0]["host"] - port = self.client.transport.hosts[0]["port"] + if self.client.transport.hosts is not None: + host = self.client.transport.hosts[0]["host"] + port = self.client.transport.hosts[0]["port"] + else: + host, port = None, None self.client.indices.refresh(index=self.index_name) return { @@ -376,7 +379,7 @@ def save_bulk(self, actions: Iterable): def push( self, atoms: Union[Atoms, Iterable], - extra_info: Union[dict, None] = None, + extra_info: Union[dict, str, None] = None, store_calc: bool = True ): """ @@ -385,7 +388,7 @@ def push( Parameters ---------- atoms: Union[Atoms, Iterable] - extra_info: Union[dict, None], optional + extra_info: Union[dict, str, None], optional Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional @@ -393,18 +396,28 @@ def push( Default is `True`. """ if extra_info and isinstance(extra_info, str): - extra_info = extras.parser.parse(extra_info) + extra_info = extras.parser.parse(extra_info) # type: ignore - # Could combine into single data.save, but keep separate for option of bulk insertion? if isinstance(atoms, Atoms): - data = AtomsModel.from_atoms(self.client, self.index_name, atoms, extra_info=extra_info, store_calc=store_calc) + data = AtomsModel.from_atoms( + self.client, + self.index_name, + atoms, + extra_info=extra_info, # type: ignore + store_calc=store_calc + ) data.save() elif isinstance(atoms, Generator) or isinstance(atoms, list): - actions = [] for item in atoms: - data = AtomsModel.from_atoms(self.client, self.index_name, item, extra_info=extra_info, store_calc=store_calc) + data = AtomsModel.from_atoms( + self.client, + self.index_name, + item, + extra_info=extra_info, # type: ignore + store_calc=store_calc + ) actions.append(data.data) actions[-1]["derived"] = data.derived self.save_bulk(actions) @@ -436,7 +449,7 @@ def upload( extra_info = {} if extra_infos: for info in extra_infos: - extra_info.update(extras.parser.parse(info)) + extra_info.update(extras.parser.parse(info)) # type: ignore extra_info["filename"] = str(file) @@ -930,8 +943,11 @@ def __repr__(self): String for OpenSearch class representation, containing the connected database host, port, and index name. """ - host = self.client.transport.hosts[0]["host"] - port = self.client.transport.hosts[0]["port"] + if self.client.transport.hosts is not None: + host = self.client.transport.hosts[0]["host"] + port = self.client.transport.hosts[0]["port"] + else: + host, port = None, None return "{}(".format(self.__class__.__name__) + \ "url={}:{}, ".format(host, port) + \ From 4c0f097f106a456982812da6b876fc339465656f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 24 Aug 2023 15:14:59 +0000 Subject: [PATCH 022/112] Fix formatting for non-string value error --- abcd/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abcd/model.py b/abcd/model.py index 15a1ece3..816cee6c 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -43,7 +43,7 @@ def update(self, value): else: raise ValueError( - "The {} type cannot be hashed! (Value: {})", format(type(value), value) + f"The {type(value)} type cannot be hashed! (Value: {value})" ) def __call__(self): From 0c0561a72fac98b041dfb30eaa00e6fba7aead41 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 24 Aug 2023 15:16:09 +0000 Subject: [PATCH 023/112] Add class to read csv data --- abcd/backends/atoms_properties.py | 160 ++++++++++++++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 161 insertions(+) create mode 100644 abcd/backends/atoms_properties.py diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py new file mode 100644 index 00000000..33ba1d5d --- /dev/null +++ b/abcd/backends/atoms_properties.py @@ -0,0 +1,160 @@ +from __future__ import annotations +import pandas as pd +import numpy as np +from typing import Union +from pathlib import Path + +class Properties(): + """ + Wrapper to identify and manipulate properties to be passed + as extra_info to the database. + + Attributes + ---------- + csv_file: Union[str, Path] + Name or path to csv file containing properties. + store_struct_file: bool + Whether to construct a filename for each structure. + struct_file_template: str + Template string for path to files containing structure. + struct_name_label: str + Field name in csv file containing values for `struct_name`. + df: pd.Dataframe + Dataframe containing loaded property data from csv file. + units: Union[dict, None], optional + Units. + """ + def __init__( + self, + csv_file: Union[str, Path], + store_struct_file: bool = False, + struct_file_template: Union[str, None] = None, + struct_name_label: Union[str, None] = None, + units: Union[dict, None] = None + ): + """ + Initialises class. + + Parameters + ---------- + csv_file: Union[str, Path] + Path or filename of csv file containing properties to be loaded. + store_struct_file: bool, optional + If true, use struct_file_template and struct_name_label to + construct filename for each structure. Default is `False`. + struct_file_template: Union[str, None], optiona + Template string for path to files containing structure. + Required only if store_struct_file is True. + Template must contain `{struct_name}`, to ensure a unique file + for each structure. Default is `None`. + struct_name_label: Union[str, None], optional + Field name in csv file containing values for `struct_name`. + Required only if store_struct_file is True. Default is `None`. + units: Union[dict, None], optional + Units for fields in csv file. If unspecified, _separate_units() + is used to identify units in field names. Default is `None`. + """ + self.csv_file = csv_file + self.store_struct_file = store_struct_file + if self.store_struct_file: + if struct_file_template is not None: + self.struct_file_template = struct_file_template + else: + raise ValueError(( + "`struct_file_template` must be specified if " + "store_struct_file is True." + )) + if struct_name_label is not None: + self.struct_name_label = struct_name_label + else: + raise ValueError( + "`struct_name_label` must be specified if store_struct_file is True." + ) + + self.df = pd.read_csv(self.csv_file) + self.df.replace({np.nan: None}, inplace=True) + + if units is not None: + for key in units.keys(): + if key not in self.df.columns.values: + raise ValueError(( + f"Invalid field name: {key}. Keys in `units` must " + f"correspond to field names in the loaded data." + )) + self.units = units + else: + self._separate_units() + + def _separate_units(self): + """ + Parse field names to determine units. + """ + columns = [] + self.units = {} + for column in list(self.df.columns.values): + if "," in column: + column_name = column.split(",")[0].strip() + self.units[column_name] = column.split(",")[1].strip() + elif "(" in column: + column_name = column.split("(")[0].strip() + self.units[column_name] = column.split("(")[1].strip()[:-1] + else: + column_name = column + + columns.append(column) + + self.df.columns = columns + + def get_struct_file(self, struct_name: str) -> str: + """ + Evaluate struct_file_template to determine structure filename + for current structure. + + Parameters + ---------- + struct_name: str + Name of current structure. + + Returns + ------- + Filename for the current structure. + """ + if struct_name is None: + raise ValueError("`struct_name` must be specified") + if "{struct_name}" not in self.struct_file_template: + raise ValueError(( + f"'struct_name' must be a variable in the template file: " + f"{self.struct_file_template}" + )) + else: + return eval(f"f'{self.struct_file_template}'") + + def to_list(self) -> list[dict]: + """ + Convert dataframe into list of properties for each structure. + + Returns + ------- + List of property dictionaries for each structure in the dataframe. + """ + properties_list = [] + self.struct_files = [] + for i in range(len(self.df)): + properties = self.df.iloc[i].to_dict() + properties["units"] = self.units + + if self.store_struct_file: + try: + struct_name = self.df.iloc[i][self.struct_name_label] + except KeyError as e: + raise ValueError(( + f"{self.struct_name_label} is not a valid column in " + f"the data loaded." + )) + struct_file = self.get_struct_file(struct_name) + properties["struct_file"] = struct_file + + properties_list.append( + {key: value for key, value in properties.items() if value is not None} + ) + return properties_list \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 0b34115a..60981918 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" opensearch-py = "^2.2.0" +pandas = "^2.2" pymongo = "^4.7.3" python = "^3.9" tqdm = "^4.66" From 29d9a670a00722a73576c31fbec22db3e620a5f3 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 24 Aug 2023 15:28:27 +0000 Subject: [PATCH 024/112] Enable spacegroups to be stored from Atoms --- abcd/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/abcd/model.py b/abcd/model.py index 816cee6c..7ce1f828 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -6,6 +6,7 @@ from ase import Atoms from ase.calculators.singlepoint import SinglePointCalculator +from ase.spacegroup.spacegroup import Spacegroup import numpy as np logger = logging.getLogger(__name__) @@ -194,6 +195,8 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): for key, value in atoms.info.items(): if isinstance(value, np.ndarray): data[key] = value.tolist() + elif isinstance(value, Spacegroup): + data[key] = value.todict() else: data[key] = value From d364e62a0bb28b69332ca4dc146bb35b9ae276cf Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 24 Aug 2023 16:30:54 +0000 Subject: [PATCH 025/112] Move structure files to list and make units optional --- abcd/backends/atoms_properties.py | 74 ++++++++++++++++++------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 33ba1d5d..791e0003 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -23,6 +23,8 @@ class Properties(): Dataframe containing loaded property data from csv file. units: Union[dict, None], optional Units. + struct_files: list[str] + List containing a filename for each structure in the dataframe. """ def __init__( self, @@ -30,7 +32,8 @@ def __init__( store_struct_file: bool = False, struct_file_template: Union[str, None] = None, struct_name_label: Union[str, None] = None, - units: Union[dict, None] = None + units: Union[dict, None] = None, + infer_units: bool = False ): """ Initialises class. @@ -53,8 +56,27 @@ def __init__( units: Union[dict, None], optional Units for fields in csv file. If unspecified, _separate_units() is used to identify units in field names. Default is `None`. + infer_units: bool, optional + Whether to attempt to infer units from field names in the + dataframe. Unused if units is not `None`. Default is `False`. """ self.csv_file = csv_file + self.df = pd.read_csv(self.csv_file) + self.df.replace({np.nan: None}, inplace=True) + + if units is not None: + for key in units.keys(): + if key not in self.df.columns.values: + raise ValueError(( + f"Invalid field name: {key}. Keys in `units` must " + f"correspond to field names in the loaded data." + )) + self.units = units + elif infer_units: + self._separate_units() + else: + self.units = None + self.store_struct_file = store_struct_file if self.store_struct_file: if struct_file_template is not None: @@ -70,20 +92,7 @@ def __init__( raise ValueError( "`struct_name_label` must be specified if store_struct_file is True." ) - - self.df = pd.read_csv(self.csv_file) - self.df.replace({np.nan: None}, inplace=True) - - if units is not None: - for key in units.keys(): - if key not in self.df.columns.values: - raise ValueError(( - f"Invalid field name: {key}. Keys in `units` must " - f"correspond to field names in the loaded data." - )) - self.units = units - else: - self._separate_units() + self.set_struct_files() def _separate_units(self): """ @@ -101,10 +110,27 @@ def _separate_units(self): else: column_name = column - columns.append(column) + columns.append(column_name) self.df.columns = columns + def set_struct_files(self): + """ + Sets a list containing a filename for each structure in the dataframe. + """ + self.struct_files = [] + + for i in range(len(self.df)): + try: + struct_name = self.df.iloc[i][self.struct_name_label] + except KeyError as e: + raise ValueError(( + f"{self.struct_name_label} is not a valid column in " + f"the data loaded." + )) + struct_file = self.get_struct_file(struct_name) + self.struct_files.append(struct_file) + def get_struct_file(self, struct_name: str) -> str: """ Evaluate struct_file_template to determine structure filename @@ -138,22 +164,10 @@ def to_list(self) -> list[dict]: List of property dictionaries for each structure in the dataframe. """ properties_list = [] - self.struct_files = [] for i in range(len(self.df)): properties = self.df.iloc[i].to_dict() - properties["units"] = self.units - - if self.store_struct_file: - try: - struct_name = self.df.iloc[i][self.struct_name_label] - except KeyError as e: - raise ValueError(( - f"{self.struct_name_label} is not a valid column in " - f"the data loaded." - )) - struct_file = self.get_struct_file(struct_name) - properties["struct_file"] = struct_file - + if self.units is not None: + properties["units"] = self.units properties_list.append( {key: value for key, value in properties.items() if value is not None} ) From e916b6b6dc206ca93e3ba049e36c29cca5838aa7 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 24 Aug 2023 17:10:54 +0000 Subject: [PATCH 026/112] Update README for OpenSearch --- README.md | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a56a44f8..7fbdc673 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Main features: - Configurations that consist of atom positions, elements, forces, and various metadata are stored as a dictionary by a MongoDB backend. - There is no predefined schema, any combination of keys are allowed for all configurations. - Two modes: "discovery" and "download". Both use filter-type queries, but in "discovery" mode, summary statistics of the configurations that pass the filter are reported. In "download" mode, the matching configurations are downloaded and exported to a file. -- The "discovery" mode can be used to learn what keys exist in the set of configurations that have passed the current quiery filter. The user can use this to refine the query. +- The "discovery" mode can be used to learn what keys exist in the set of configurations that have passed the current query filter. The user can use this to refine the query. - Complex queries on dictionary key-value pairs are allowed, and their logical combinations. ## Installation @@ -24,7 +24,9 @@ $ pip install git+https://github.com/libAtoms/abcd.git ## Setup -If you have an already running mongo server, or install your own, they you are ready to go. Alternatively, +### MongoDB + +If you have an already running MongoDB server, or install your own, then you are ready to go. Alternatively, ``` docker run -d --rm --name abcd-mongodb -v :/data/db -p 27017:27017 mongo @@ -49,7 +51,22 @@ The above login command will place create an `~/.abcd` file with the following c {"url": "mongodb://localhost"} ``` -# Remote access +### OpenSearch +If you have an already running OpenSearch server, or install your own, then you are ready to go. Alternatively, + +``` +sudo sysctl -w vm.max_map_count=262144 +docker run -d --rm --name abcd-opensearch -v :/data/db -p 9200:9200 -it opensearchproject/opensearch:latest +``` + +will download and install a docker and run a database in it. + +To connect to an OpenSearch database that is already running, use +``` +abcd login opensearch://username:password@localhost +``` + +## Remote access You can set up an `abcd` user on your machine where the database is running, and then access it remotely for discovering data. Make sure you have the `~/.abcd` file created for this user, then put this in the `.ssh/authorized_keys` file (substituting your public key for the last part): ``` @@ -61,12 +78,12 @@ Then you'll be able to access the database remotely using, e.g. ssh abcd@your.machine summary ``` -# GUI through a browser + visualisation +## GUI through a browser + visualisation The database has a simple GUI, coupled with a visualiser. Data for now needs to be uploaded on the command line, but query can be done through the browsers. Instructions below (they include running `abcd` from a docker too, but of course you can run it outside the docker as well. ) -#### Usage in docker +## Usage in docker Currently a manual uploaded image is available, that was built on 7/2/2020 by Tamas K. Stenczel. To access it: 1. pull the image From 801f87dcc49f68ed55e12313765c9de504dc3673 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 25 Aug 2023 09:32:41 +0000 Subject: [PATCH 027/112] Update assertions for database tests --- tests/database.py | 191 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 tests/database.py diff --git a/tests/database.py b/tests/database.py new file mode 100644 index 00000000..a4184846 --- /dev/null +++ b/tests/database.py @@ -0,0 +1,191 @@ +import unittest +import mongomock +from openmock import openmock + +from abcd import ABCD +import logging + + +class Mongo(unittest.TestCase): + @classmethod + @mongomock.patch(servers=(("localhost", 27017),)) + def setUpClass(cls): + logging.basicConfig(level=logging.INFO) + url = "mongodb://localhost" + abcd = ABCD.from_url(url) + abcd.print_info() + + cls.abcd = abcd + + @classmethod + def tearDownClass(cls): + cls.abcd.destroy() + + def test_thing(self): + print(self.abcd.info()) + + def test_push(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + + self.abcd.destroy() + self.abcd.push(atoms) + new = list(self.abcd.get_atoms())[0] + + assert atoms == new + self.abcd.destroy() + + +class OpenSearch(unittest.TestCase): + @classmethod + @openmock + def setUpClass(cls): + from abcd.backends.atoms_opensearch import OpenSearchDatabase + + logging.basicConfig(level=logging.INFO) + url = "opensearch://admin:admin@localhost:9200" + abcd = ABCD.from_url(url, index_name="test_index", analyse_schema=False) + assert isinstance(abcd, OpenSearchDatabase) + cls.abcd = abcd + + @classmethod + def tearDownClass(cls): + cls.abcd.destroy() + + def test_destroy(self): + self.assertTrue(self.abcd.client.indices.exists("test_index")) + self.abcd.destroy() + self.assertFalse(self.abcd.client.indices.exists("test_index")) + return + + def test_create(self): + self.abcd.destroy() + self.abcd.create() + self.assertTrue(self.abcd.client.indices.exists("test_index")) + self.assertFalse(self.abcd.client.indices.exists("fake_index")) + + def test_push(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + from abcd.backends.atoms_opensearch import AtomsModel + + self.abcd.destroy() + self.abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + self.abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + W 0.00000000 0.00000000 0.00000000 + W 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + result = AtomsModel( + None, + None, + self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + assert atoms_1 == result + assert atoms_2 != result + + def test_bulk(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + from abcd.backends.atoms_opensearch import AtomsModel + + self.abcd.destroy() + self.abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + + xyz_2 = StringIO( + """1 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + atoms_list = [] + atoms_list.append(atoms_1) + atoms_list.append(atoms_2) + self.abcd.push(atoms_list) + assert self.abcd.count() == 2 + + result_1 = AtomsModel( + None, + None, + self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + result_2 = AtomsModel( + None, + None, + self.abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + ).to_ase() + assert atoms_1 == result_1 + assert atoms_2 == result_2 + + def test_count(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + self.abcd.destroy() + self.abcd.create() + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + self.abcd.push(atoms) + self.abcd.push(atoms) + assert self.abcd.count() == 2 + + +if __name__ == "__main__": + unittest.main(verbosity=1, exit=False) From 9009ded9495090d838b5a325e5fe7afd50edc30d Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 25 Aug 2023 11:18:35 +0000 Subject: [PATCH 028/112] Add tests for csv reader --- tests/examples.csv | 4 ++ tests/properties.py | 163 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100755 tests/examples.csv create mode 100644 tests/properties.py diff --git a/tests/examples.csv b/tests/examples.csv new file mode 100755 index 00000000..6473a822 --- /dev/null +++ b/tests/examples.csv @@ -0,0 +1,4 @@ +Text,Integers,Floating,Boolean,Missing data,"Comma units, m",Bracket units (s) +Some,1,0.01,TRUE,Missing,0,0 +test,2,0.1,FALSE,,1,1 +data,3,1,FALSE,data,2,2 diff --git a/tests/properties.py b/tests/properties.py new file mode 100644 index 00000000..d7ae9508 --- /dev/null +++ b/tests/properties.py @@ -0,0 +1,163 @@ +import unittest +from abcd.backends.atoms_properties import Properties + +class PropertiesTests(unittest.TestCase): + """Testing properties csv reader""" + + @classmethod + def setUpClass(cls): + """ + Load example csv file. + """ + import os + class_path = os.path.normpath(os.path.abspath(__file__)) + csv_file = os.path.dirname(class_path) + "/examples.csv" + cls.property = Properties(csv_file) + + def test_dataframe(self): + """ + Test data correctly stored in pandas DataFrame. + """ + from pandas import DataFrame + assert isinstance(self.property.df, DataFrame) + assert len(self.property.df) == 3 + + def test_specify_units(self): + """ + Test units can be specified manually, if they match existing fields. + """ + input_units_1 = {"Integers": "items", "Floating": "seconds"} + properties_1 = Properties( + csv_file=self.property.csv_file, + units=input_units_1, + ) + self.assertEqual(properties_1.units, input_units_1) + + input_units_2 = {"Fake": "m"} + with self.assertRaises(ValueError): + properties_1 = Properties( + csv_file=self.property.csv_file, + units=input_units_2, + ) + + def test_infer_units(self): + """ + Test units can be inferred from field names. + """ + properties = Properties( + csv_file=self.property.csv_file, + infer_units=True, + ) + expected_units = { + "Comma units": "m", + "Bracket units": "s" + } + expected_fields = [ + "Text", + "Integers", + "Floating", + "Boolean", + "Missing data", + "Comma units", + "Bracket units" + ] + self.assertEqual(properties.units, expected_units) + self.assertEqual(list(properties.df.columns.values), expected_fields) + + def test_struct_file(self): + """ + Test structure file names can be inferred from a field. + """ + struct_file_template = "test_{struct_name}_file.txt" + struct_name_label = "Text" + properties_1 = Properties( + csv_file=self.property.csv_file, + store_struct_file=True, + struct_file_template=struct_file_template, + struct_name_label=struct_name_label + ) + expected_struct_files = [ + "test_Some_file.txt", + "test_test_file.txt", + "test_data_file.txt" + ] + self.assertIsInstance(properties_1.struct_files, list) + for i, file in enumerate(expected_struct_files): + self.assertEqual(properties_1.struct_files[i], file) + + invalid_template = "invalid_template" + with self.assertRaises(ValueError): + Properties( + csv_file=self.property.csv_file, + store_struct_file=True, + struct_file_template=invalid_template, + struct_name_label=struct_name_label + ) + + invalid_label = "label" + with self.assertRaises(ValueError): + Properties( + csv_file=self.property.csv_file, + store_struct_file=True, + struct_file_template=struct_file_template, + struct_name_label=invalid_label + ) + + def test_to_list(self): + """ + Test dataframe can be converted into a list of properties. + """ + self.assertEqual(len(self.property.to_list()), 3) + self.assertIsInstance(self.property.to_list(), list) + self.assertIsInstance(self.property.to_list()[0], dict) + expected_property = { + "Text": "Some", + "Integers": 1, + "Floating": 0.01, + "Boolean": True, + "Missing data": "Missing", + "Comma units, m": 0, + "Bracket units (s)": 0 + } + self.assertEqual(self.property.to_list()[0], expected_property) + + def test_missing_data(self): + """ + Test missing data is not included in properties. + """ + expected_property = { + "Text": "test", + "Integers": 2, + "Floating": 0.1, + "Boolean": False, + "Comma units, m": 1, + "Bracket units (s)": 1 + } + self.assertEqual(self.property.to_list()[1], expected_property) + + def test_to_list_units(self): + """ + Test units are included in properties when converting to a list. + """ + properties_1 = Properties( + csv_file=self.property.csv_file, + infer_units=True, + ) + expected_units = { + "Comma units": "m", + "Bracket units": "s" + } + expected_property = { + "Text": "Some", + "Integers": 1, + "Floating": 0.01, + "Boolean": True, + "Missing data": "Missing", + "Comma units": 0, + "Bracket units": 0, + "units": expected_units + } + self.assertEqual(properties_1.to_list()[0], expected_property) + +if __name__ == '__main__': + unittest.main() From 74fb8357529d4250e2d0b753d39f2200433a5253 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 25 Aug 2023 14:56:37 +0000 Subject: [PATCH 029/112] Add flake8 and black --- .flake8 | 16 ++++++++++++++++ pyproject.toml | 2 ++ 2 files changed, 18 insertions(+) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..7db48f62 --- /dev/null +++ b/.flake8 @@ -0,0 +1,16 @@ +[flake8] +select = B,BLK,C,E,F,I,S,W +max-complexity = 30 +max-line-length = 88 +ignore = E203,W503 +application-import-names = abcd,tests +import-order-style = google +exclude = + abcd/backends/atoms_pymongo.py, + abcd/frontends, + abcd/model.py, + abcd/parsers/queries_new.py, + abcd/parsers/queries.py, + abcd/parsers/extras.py, + abcd/server, + tests/__init__.py diff --git a/pyproject.toml b/pyproject.toml index 60981918..15d2d7d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,8 @@ python = "^3.9" tqdm = "^4.66" [tool.poetry.group.dev.dependencies] +black = "^22.3.0" +flake8 = "^3.7.9" mongomock = "^4.1.2" openmock = "^2.2" pytest = "^8.2.2" From c67d0041d5bcca7039f0ac199010bc0c2c32209f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 25 Aug 2023 15:14:10 +0000 Subject: [PATCH 030/112] Apply black formatting --- abcd/backends/atoms_opensearch.py | 349 ++++++++++++++---------------- abcd/backends/atoms_properties.py | 51 +++-- abcd/parsers/extras.py | 5 +- abcd/server/app/views/database.py | 10 +- tests/database.py | 5 + tests/properties.py | 34 ++- tests/test_parsers.py | 6 - 7 files changed, 217 insertions(+), 243 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index c8abac89..36d4528a 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -34,7 +34,7 @@ int: "int", str: "str", datetime: "date", - dict: "dict" + dict: "dict", } @@ -47,12 +47,14 @@ class OpenSearchQuery(AbstractQuerySet): query_builder: ElasticsearchQueryBuilder Query builder to convert a Tree in an OpenSearch query. """ - def __init__(self, - client: Union[OpenSearch, None] = None, - index_name: Union[str, None] = None, - analyse_schema: bool = False + + def __init__( + self, + client: Union[OpenSearch, None] = None, + index_name: Union[str, None] = None, + analyse_schema: bool = False, ): - """" + """ " Initialises class. Parameters @@ -70,14 +72,13 @@ def __init__(self, if analyse_schema and client is not None and index_name is not None: schema = client.indices.get_mapping()[index_name] schema_analizer = SchemaAnalyzer(schema) - self.query_builder = ElasticsearchQueryBuilder(**schema_analizer.query_builder_options()) + self.query_builder = ElasticsearchQueryBuilder( + **schema_analizer.query_builder_options() + ) else: self.query_builder = ElasticsearchQueryBuilder() - def __call__( - self, - query: Union[dict, str, None] - ) -> Union[dict, None]: + def __call__(self, query: Union[dict, str, None]) -> Union[dict, None]: """ Parses and builds queries from strings using ElasticsearchQueryBuilder. @@ -93,10 +94,10 @@ def __call__( Union[dict, None] The parsed query for OpenSearch. """ - logger.info('parsed query: {}'.format(query)) + logger.info("parsed query: {}".format(query)) if not query: - query=self.get_default_query() + query = self.get_default_query() if isinstance(query, dict): return query @@ -115,9 +116,7 @@ def get_default_query() -> dict: ------- The default query for OpenSearch. """ - return { - "match_all": {} - } + return {"match_all": {}} class AtomsModel(AbstractModel): @@ -131,11 +130,12 @@ class AtomsModel(AbstractModel): _index_name: Union[str, None] OpenSearch index name. """ + def __init__( - self, - client: Union[OpenSearch, None] = None, - index_name: Union[str, None] = None, - dict: Union[dict, None] = None + self, + client: Union[OpenSearch, None] = None, + index_name: Union[str, None] = None, + dict: Union[dict, None] = None, ): """ Initialises class. @@ -161,7 +161,7 @@ def from_atoms( index_name: str, atoms: Atoms, extra_info: Union[dict, None] = None, - store_calc: bool = True + store_calc: bool = True, ) -> AtomsModel: """ Reads and prepares atoms data and extra information for OpenSearch. @@ -213,7 +213,7 @@ def save(self): if not self._id: self._client.index(index=self._index_name, body=body) else: - body.pop('_id', None) + body.pop("_id", None) body = {"doc": body} self._client.update(index=self._index_name, id=self._id, body=body) @@ -252,7 +252,7 @@ def __init__( username: str = "admin", password: str = "admin", analyse_schema: bool = True, - **kwargs + **kwargs, ): """ Initialises class. @@ -326,13 +326,10 @@ def info(self): "db": self.db, "index": self.index_name, "number of confs": self.client.count(index=self.index_name)["count"], - "type": "opensearch" + "type": "opensearch", } - def delete( - self, - query: Union[dict, str, None] = None - ): + def delete(self, query: Union[dict, str, None] = None): """ Deletes documents from the database. @@ -342,9 +339,7 @@ def delete( Query to filter documents to be deleted. Default is `None`. """ query = self.parser(query) - body = { - "query": query - } + body = {"query": query} self.client.delete_by_query( index=self.index_name, @@ -380,7 +375,7 @@ def push( self, atoms: Union[Atoms, Iterable], extra_info: Union[dict, str, None] = None, - store_calc: bool = True + store_calc: bool = True, ): """ Save data from atoms object(s) to database. @@ -396,15 +391,15 @@ def push( Default is `True`. """ if extra_info and isinstance(extra_info, str): - extra_info = extras.parser.parse(extra_info) # type: ignore + extra_info = extras.parser.parse(extra_info) # type: ignore if isinstance(atoms, Atoms): data = AtomsModel.from_atoms( self.client, self.index_name, atoms, - extra_info=extra_info, # type: ignore - store_calc=store_calc + extra_info=extra_info, # type: ignore + store_calc=store_calc, ) data.save() @@ -415,8 +410,8 @@ def push( self.client, self.index_name, item, - extra_info=extra_info, # type: ignore - store_calc=store_calc + extra_info=extra_info, # type: ignore + store_calc=store_calc, ) actions.append(data.data) actions[-1]["derived"] = data.derived @@ -426,7 +421,7 @@ def upload( self, file: Path, extra_infos: Union[Iterable, dict, None] = None, - store_calc: bool = True + store_calc: bool = True, ): """ Upload data from a file to the database. @@ -449,7 +444,7 @@ def upload( extra_info = {} if extra_infos: for info in extra_infos: - extra_info.update(extras.parser.parse(info)) # type: ignore + extra_info.update(extras.parser.parse(info)) # type: ignore extra_info["filename"] = str(file) @@ -457,8 +452,7 @@ def upload( self.push(data, extra_info, store_calc=store_calc) def get_items( - self, - query: Union[dict, str, None] = None + self, query: Union[dict, str, None] = None ) -> Generator[dict, None, None]: """ Get data as a dictionary from documents in the database. @@ -482,11 +476,10 @@ def get_items( index=self.index_name, query=query, ): - yield {'_id': hit['_id'], **hit['_source']} + yield {"_id": hit["_id"], **hit["_source"]} def get_atoms( - self, - query: Union[dict, str, None] = None + self, query: Union[dict, str, None] = None ) -> Generator[Atoms, None, None]: """ Get data as Atoms object from documents in the database. @@ -512,10 +505,7 @@ def get_atoms( ): yield AtomsModel(None, None, hit["_source"]).to_ase() - def count( - self, - query: Union[dict, str, None] = None - ) -> int: + def count(self, query: Union[dict, str, None] = None) -> int: """ Counts number of documents in the database. @@ -556,13 +546,16 @@ def property(self, name, query: Union[dict, str, None] = None) -> list: "query": query, } - return [hit["_source"][format(name)] for hit in helpers.scan( - self.client, - index=self.index_name, - query=query, - stored_fields=format(name), - _source=format(name), - )] + return [ + hit["_source"][format(name)] + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + stored_fields=format(name), + _source=format(name), + ) + ] def count_property(self, name, query: Union[dict, str, None] = None) -> dict: """ @@ -583,24 +576,23 @@ def count_property(self, name, query: Union[dict, str, None] = None) -> dict: query = self.parser(query) body = { - "size" : 0, + "size": 0, "query": query, "aggs": { format(name): { "terms": { "field": format(name), - "size": 10000, # Use composite for all results? + "size": 10000, # Use composite for all results? }, }, - } + }, } prop = {} - for val in self.client.search( - index=self.index_name, - body=body, - )["aggregations"][format(name)]["buckets"]: + for val in self.client.search(index=self.index_name, body=body,)[ + "aggregations" + ][format(name)]["buckets"]: prop[val["key"]] = val["doc_count"] return prop @@ -625,36 +617,24 @@ def properties(self, query: Union[dict, str, None] = None) -> dict: properties = {} - for prop in self.client.indices.get_mapping( - index=self.index_name - )[self.index_name]["mappings"]["properties"].keys(): + for prop in self.client.indices.get_mapping(index=self.index_name)[ + self.index_name + ]["mappings"]["properties"].keys(): body = { - "size" : 0, + "size": 0, "query": query, "aggs": { "info_keys": { - "filter": { - "term": { - "derived.info_keys.keyword": prop - } - }, + "filter": {"term": {"derived.info_keys.keyword": prop}}, }, "derived_keys": { - "filter": { - "term": { - "derived.derived_keys.keyword": prop - } - }, + "filter": {"term": {"derived.derived_keys.keyword": prop}}, }, "arrays_keys": { - "filter": { - "term": { - "derived.arrays_keys.keyword": prop - } - }, + "filter": {"term": {"derived.arrays_keys.keyword": prop}}, }, - } + }, } res = self.client.search( @@ -693,21 +673,16 @@ def get_type_of_property(self, prop: str, category: str) -> str: # TODO: Probably it would be nicer to store the type info in the database from the beginning. atoms = self.client.search( index=self.index_name, - body = { - "size" : 1, - "query": { - "exists" : { - "field": prop - } - } - } + body={"size": 1, "query": {"exists": {"field": prop}}}, ) data = atoms["hits"]["hits"][0]["_source"][prop] if category == "arrays": if type(data[0]) == list: - return "array({}, N x {})".format(map_types[type(data[0][0])], len(data[0])) + return "array({}, N x {})".format( + map_types[type(data[0][0])], len(data[0]) + ) else: return "vector({}, N)".format(map_types[type(data[0])]) @@ -740,40 +715,28 @@ def count_properties(self, query: Union[dict, str, None] = None) -> dict: properties = {} try: - keys = self.client.indices.get_mapping( - index=self.index_name - )[self.index_name]["mappings"]["properties"].keys() + keys = self.client.indices.get_mapping(index=self.index_name)[ + self.index_name + ]["mappings"]["properties"].keys() except KeyError: return properties for key in keys: body = { - "size" : 0, + "size": 0, "query": query, "aggs": { "info_keys": { - "filter": { - "term": { - "derived.info_keys.keyword": key - } - }, + "filter": {"term": {"derived.info_keys.keyword": key}}, }, "derived_keys": { - "filter": { - "term": { - "derived.derived_keys.keyword": key - } - }, + "filter": {"term": {"derived.derived_keys.keyword": key}}, }, "arrays_keys": { - "filter": { - "term": { - "derived.arrays_keys.keyword": key - } - }, + "filter": {"term": {"derived.arrays_keys.keyword": key}}, }, - } + }, } res = self.client.search( @@ -788,7 +751,7 @@ def count_properties(self, query: Union[dict, str, None] = None) -> dict: properties[key] = { "count": count, "category": label.split("_")[0], - "dtype": self.get_type_of_property(key, label.split("_")[0]) + "dtype": self.get_type_of_property(key, label.split("_")[0]), } return properties @@ -804,7 +767,7 @@ def add_property(self, data: dict, query: Union[dict, str, None] = None): query: Union[dict, str, None] Query to filter documents to add properties to. Default is `None`. """ - logger.info('add: data={}, query={}'.format(data, query)) + logger.info("add: data={}, query={}".format(data, query)) query = self.parser(query) script_txt = "ctx._source.derived.info_keys.addAll(params.keys);" @@ -815,11 +778,9 @@ def add_property(self, data: dict, query: Union[dict, str, None] = None): "script": { "source": script_txt, "lang": "painless", - "params" : { - "keys" : list(data.keys()) - }, + "params": {"keys": list(data.keys())}, }, - "query": query + "query": query, } self.client.update_by_query( @@ -828,10 +789,7 @@ def add_property(self, data: dict, query: Union[dict, str, None] = None): ) def rename_property( - self, - name: str, - new_name: str, - query: Union[dict, str, None] = None + self, name: str, new_name: str, query: Union[dict, str, None] = None ): """ Renames property for all matching documents. @@ -845,11 +803,14 @@ def rename_property( query: Union[dict, str, None] Query to filter documents to rename property. Default is `None`. """ - logger.info('rename: query={}, old={}, new={}'.format(query, name, new_name)) + logger.info("rename: query={}, old={}, new={}".format(query, name, new_name)) query = self.parser(query) script_txt = f"if (!ctx._source.containsKey('{new_name}')) {{ " - script_txt += f"ctx._source.{new_name} = ctx._source.{name}; ctx._source.remove('params.name');" + script_txt += ( + f"ctx._source.{new_name} = ctx._source.{name};" + " ctx._source.remove('params.name');" + ) script_txt += f"for (int i=0; i Union[dict, None]: """ Calculate histogram statistics for a property from all matching documents. @@ -949,9 +898,11 @@ def __repr__(self): else: host, port = None, None - return "{}(".format(self.__class__.__name__) + \ - "url={}:{}, ".format(host, port) + \ - "index={}) ".format(self.index_name) + return ( + "{}(".format(self.__class__.__name__) + + "url={}:{}, ".format(host, port) + + "index={}) ".format(self.index_name) + ) def _repr_html_(self): """ @@ -967,9 +918,13 @@ def print_info(self): """ Show basic information about the connected OpenSearch database. """ - out = linesep.join(["{:=^50}".format(" ABCD OpenSearch "), - "{:>10}: {}".format("type", "opensearch"), - linesep.join("{:>10}: {}".format(k, v) for k, v in self.info().items())]) + out = linesep.join( + [ + "{:=^50}".format(" ABCD OpenSearch "), + "{:>10}: {}".format("type", "opensearch"), + linesep.join("{:>10}: {}".format(k, v) for k, v in self.info().items()), + ] + ) print(out) @@ -993,26 +948,36 @@ def histogram(name, data, **kwargs): return None if ptype == float: - bins = kwargs.get('bins', 10) + bins = kwargs.get("bins", 10) return _hist_float(name, data, bins) elif ptype == int: - bins = kwargs.get('bins', 10) + bins = kwargs.get("bins", 10) return _hist_int(name, data, bins) elif ptype == str: return _hist_str(name, data, **kwargs) elif ptype == datetime: - bins = kwargs.get('bins', 10) + bins = kwargs.get("bins", 10) return _hist_date(name, data, bins) else: - print('{}: Histogram for list of {} types are not supported!'.format(name, type(data[0]))) - logger.info('{}: Histogram for list of {} types are not supported!'.format(name, type(data[0]))) + print( + "{}: Histogram for list of {} types are not supported!".format( + name, type(data[0]) + ) + ) + logger.info( + "{}: Histogram for list of {} types are not supported!".format( + name, type(data[0]) + ) + ) else: - logger.info('{}: Histogram for {} types are not supported!'.format(name, type(data))) + logger.info( + "{}: Histogram for {} types are not supported!".format(name, type(data)) + ) return None @@ -1021,16 +986,16 @@ def _hist_float(name, data, bins=10): hist, bin_edges = np.histogram(data, bins=bins) return { - 'type': 'hist_float', - 'name': name, - 'bins': bins, - 'edges': bin_edges, - 'counts': hist, - 'min': data.min(), - 'max': data.max(), - 'median': data.mean(), - 'std': data.std(), - 'var': data.var() + "type": "hist_float", + "name": name, + "bins": bins, + "edges": bin_edges, + "counts": hist, + "min": data.min(), + "max": data.max(), + "median": data.mean(), + "std": data.std(), + "var": data.var(), } @@ -1041,16 +1006,16 @@ def _hist_date(name, data, bins=10): fromtimestamp = datetime.fromtimestamp return { - 'type': 'hist_date', - 'name': name, - 'bins': bins, - 'edges': [fromtimestamp(d) for d in bin_edges], - 'counts': hist, - 'min': fromtimestamp(hist_data.min()), - 'max': fromtimestamp(hist_data.max()), - 'median': fromtimestamp(hist_data.mean()), - 'std': fromtimestamp(hist_data.std()), - 'var': fromtimestamp(hist_data.var()) + "type": "hist_date", + "name": name, + "bins": bins, + "edges": [fromtimestamp(d) for d in bin_edges], + "counts": hist, + "min": fromtimestamp(hist_data.min()), + "max": fromtimestamp(hist_data.max()), + "median": fromtimestamp(hist_data.mean()), + "std": fromtimestamp(hist_data.std()), + "var": fromtimestamp(hist_data.var()), } @@ -1064,16 +1029,16 @@ def _hist_int(name, data, bins=10): hist, bin_edges = np.histogram(data, bins=bins) return { - 'type': 'hist_int', - 'name': name, - 'bins': bins, - 'edges': bin_edges, - 'counts': hist, - 'min': data.min(), - 'max': data.max(), - 'median': data.mean(), - 'std': data.std(), - 'var': data.var() + "type": "hist_int", + "name": name, + "bins": bins, + "edges": bin_edges, + "counts": hist, + "min": data.min(), + "max": data.max(), + "median": data.mean(), + "std": data.std(), + "var": data.var(), } @@ -1082,7 +1047,9 @@ def _hist_str(name, data, bins=10, truncate=20): if truncate: # data = (item[:truncate] for item in data) - data = (item[:truncate] + '...' if len(item) > truncate else item for item in data) + data = ( + item[:truncate] + "..." if len(item) > truncate else item for item in data + ) data = Counter(data) @@ -1092,12 +1059,12 @@ def _hist_str(name, data, bins=10, truncate=20): labels, counts = zip(*data.items()) return { - 'type': 'hist_str', - 'name': name, - 'total': sum(data.values()), - 'unique': n_unique, - 'labels': labels[:bins], - 'counts': counts[:bins] + "type": "hist_str", + "name": name, + "total": sum(data.values()), + "unique": n_unique, + "labels": labels[:bins], + "counts": counts[:bins], } diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 791e0003..b3424c5a 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -4,7 +4,8 @@ from typing import Union from pathlib import Path -class Properties(): + +class Properties: """ Wrapper to identify and manipulate properties to be passed as extra_info to the database. @@ -26,6 +27,7 @@ class Properties(): struct_files: list[str] List containing a filename for each structure in the dataframe. """ + def __init__( self, csv_file: Union[str, Path], @@ -33,7 +35,7 @@ def __init__( struct_file_template: Union[str, None] = None, struct_name_label: Union[str, None] = None, units: Union[dict, None] = None, - infer_units: bool = False + infer_units: bool = False, ): """ Initialises class. @@ -67,10 +69,10 @@ def __init__( if units is not None: for key in units.keys(): if key not in self.df.columns.values: - raise ValueError(( + raise ValueError( f"Invalid field name: {key}. Keys in `units` must " - f"correspond to field names in the loaded data." - )) + "correspond to field names in the loaded data." + ) self.units = units elif infer_units: self._separate_units() @@ -82,15 +84,16 @@ def __init__( if struct_file_template is not None: self.struct_file_template = struct_file_template else: - raise ValueError(( + raise ValueError( "`struct_file_template` must be specified if " "store_struct_file is True." - )) + ) if struct_name_label is not None: self.struct_name_label = struct_name_label else: raise ValueError( - "`struct_name_label` must be specified if store_struct_file is True." + "`struct_name_label` must be specified if store_struct_file is" + " True." ) self.set_struct_files() @@ -124,34 +127,34 @@ def set_struct_files(self): try: struct_name = self.df.iloc[i][self.struct_name_label] except KeyError as e: - raise ValueError(( + raise ValueError( f"{self.struct_name_label} is not a valid column in " - f"the data loaded." - )) + "the data loaded." + ) struct_file = self.get_struct_file(struct_name) self.struct_files.append(struct_file) def get_struct_file(self, struct_name: str) -> str: """ - Evaluate struct_file_template to determine structure filename - for current structure. + Evaluate struct_file_template to determine structure filename + for current structure. - Parameters - ---------- - struct_name: str - Name of current structure. + Parameters + ---------- + struct_name: str + Name of current structure. - Returns - ------- - Filename for the current structure. + Returns + ------- + Filename for the current structure. """ if struct_name is None: raise ValueError("`struct_name` must be specified") if "{struct_name}" not in self.struct_file_template: - raise ValueError(( - f"'struct_name' must be a variable in the template file: " + raise ValueError( + "'struct_name' must be a variable in the template file: " f"{self.struct_file_template}" - )) + ) else: return eval(f"f'{self.struct_file_template}'") @@ -171,4 +174,4 @@ def to_list(self) -> list[dict]: properties_list.append( {key: value for key, value in properties.items() if value is not None} ) - return properties_list \ No newline at end of file + return properties_list diff --git a/abcd/parsers/extras.py b/abcd/parsers/extras.py index 08bae00f..e6d1bc10 100644 --- a/abcd/parsers/extras.py +++ b/abcd/parsers/extras.py @@ -108,8 +108,9 @@ def string(self, s): "scientific_float_2=5e-6", 'scientific_float_array="1.2 2.2e3 4e1 3.3e-1 2e-2"', 'not_array="1.2 3.4 text"', - "array_nested=[[1,2],[3,4]] " # gets flattented if not 3x3 - "array_many_other_quotes=({[4 8 12]})", + ( # gets flattented if not 3x3 + "array_nested=[[1,2],[3,4]] array_many_other_quotes=({[4 8 12]})" + ), "array_boolean={T F T F}", 'array_boolean_2=" T, F, T " ' # leading spaces # 'not_bool_array=[T F S]', diff --git a/abcd/server/app/views/database.py b/abcd/server/app/views/database.py index 607dc8b8..25d18f86 100644 --- a/abcd/server/app/views/database.py +++ b/abcd/server/app/views/database.py @@ -20,7 +20,10 @@ def database(database_name): info = { "name": database_name, - "description": "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor. Duis mollis, est non commodo luctus.", + "description": ( + "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor." + " Duis mollis, est non commodo luctus." + ), "columns": [ {"slug": "formula", "name": "Formula"}, {"slug": "energy", "name": "Energy"}, @@ -42,7 +45,10 @@ def database(database_name): def settings(database_name): info = { "name": database_name, - "description": "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor. Duis mollis, est non commodo luctus.", + "description": ( + "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor." + " Duis mollis, est non commodo luctus." + ), "columns": [ {"slug": "formula", "name": "Formula"}, {"slug": "energy", "name": "Energy"}, diff --git a/tests/database.py b/tests/database.py index a4184846..3733e6a9 100644 --- a/tests/database.py +++ b/tests/database.py @@ -32,8 +32,13 @@ def test_push(self): xyz = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" +<<<<<<< HEAD Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 +======= + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 +>>>>>>> c962bfe (Apply black formatting) """ ) diff --git a/tests/properties.py b/tests/properties.py index d7ae9508..eebbdc6a 100644 --- a/tests/properties.py +++ b/tests/properties.py @@ -1,6 +1,7 @@ import unittest from abcd.backends.atoms_properties import Properties + class PropertiesTests(unittest.TestCase): """Testing properties csv reader""" @@ -10,6 +11,7 @@ def setUpClass(cls): Load example csv file. """ import os + class_path = os.path.normpath(os.path.abspath(__file__)) csv_file = os.path.dirname(class_path) + "/examples.csv" cls.property = Properties(csv_file) @@ -19,6 +21,7 @@ def test_dataframe(self): Test data correctly stored in pandas DataFrame. """ from pandas import DataFrame + assert isinstance(self.property.df, DataFrame) assert len(self.property.df) == 3 @@ -39,7 +42,7 @@ def test_specify_units(self): csv_file=self.property.csv_file, units=input_units_2, ) - + def test_infer_units(self): """ Test units can be inferred from field names. @@ -48,10 +51,7 @@ def test_infer_units(self): csv_file=self.property.csv_file, infer_units=True, ) - expected_units = { - "Comma units": "m", - "Bracket units": "s" - } + expected_units = {"Comma units": "m", "Bracket units": "s"} expected_fields = [ "Text", "Integers", @@ -59,7 +59,7 @@ def test_infer_units(self): "Boolean", "Missing data", "Comma units", - "Bracket units" + "Bracket units", ] self.assertEqual(properties.units, expected_units) self.assertEqual(list(properties.df.columns.values), expected_fields) @@ -74,12 +74,12 @@ def test_struct_file(self): csv_file=self.property.csv_file, store_struct_file=True, struct_file_template=struct_file_template, - struct_name_label=struct_name_label + struct_name_label=struct_name_label, ) expected_struct_files = [ "test_Some_file.txt", "test_test_file.txt", - "test_data_file.txt" + "test_data_file.txt", ] self.assertIsInstance(properties_1.struct_files, list) for i, file in enumerate(expected_struct_files): @@ -91,7 +91,7 @@ def test_struct_file(self): csv_file=self.property.csv_file, store_struct_file=True, struct_file_template=invalid_template, - struct_name_label=struct_name_label + struct_name_label=struct_name_label, ) invalid_label = "label" @@ -100,7 +100,7 @@ def test_struct_file(self): csv_file=self.property.csv_file, store_struct_file=True, struct_file_template=struct_file_template, - struct_name_label=invalid_label + struct_name_label=invalid_label, ) def test_to_list(self): @@ -117,7 +117,7 @@ def test_to_list(self): "Boolean": True, "Missing data": "Missing", "Comma units, m": 0, - "Bracket units (s)": 0 + "Bracket units (s)": 0, } self.assertEqual(self.property.to_list()[0], expected_property) @@ -131,7 +131,7 @@ def test_missing_data(self): "Floating": 0.1, "Boolean": False, "Comma units, m": 1, - "Bracket units (s)": 1 + "Bracket units (s)": 1, } self.assertEqual(self.property.to_list()[1], expected_property) @@ -143,10 +143,7 @@ def test_to_list_units(self): csv_file=self.property.csv_file, infer_units=True, ) - expected_units = { - "Comma units": "m", - "Bracket units": "s" - } + expected_units = {"Comma units": "m", "Bracket units": "s"} expected_property = { "Text": "Some", "Integers": 1, @@ -155,9 +152,10 @@ def test_to_list_units(self): "Missing data": "Missing", "Comma units": 0, "Bracket units": 0, - "units": expected_units + "units": expected_units, } self.assertEqual(properties_1.to_list()[0], expected_property) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_parsers.py b/tests/test_parsers.py index a4a6f7ea..07cab3f0 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -179,12 +179,6 @@ def test_operators(self, parser, string, expected): """Operators""" assert parser.parse(string) - - def test_colon_key_value_pairs(self, parser, string, expected): - """Key value pairs separated by colons""" - assert expected == parser.parse(string) - - @pytest.mark.parametrize( "string, expected", [ From ec82064d806f8a9bbdea46cc3a0a50cbe7c1192f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 25 Aug 2023 15:38:21 +0000 Subject: [PATCH 031/112] Conform to flake8 style --- abcd/backends/atoms_opensearch.py | 18 +++++++++--------- abcd/backends/atoms_properties.py | 2 +- tests/database.py | 17 ++++++----------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 36d4528a..cc1934e2 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -670,7 +670,8 @@ def get_type_of_property(self, prop: str, category: str) -> str: ------- Type of the property. """ - # TODO: Probably it would be nicer to store the type info in the database from the beginning. + # TODO: Probably it would be nicer to store the type info in the database + # from the beginning. atoms = self.client.search( index=self.index_name, body={"size": 1, "query": {"exists": {"field": prop}}}, @@ -810,12 +811,11 @@ def rename_property( script_txt += ( f"ctx._source.{new_name} = ctx._source.{name};" " ctx._source.remove('params.name');" + " for (int i=0; i>>>>>> c962bfe (Apply black formatting) """ ) @@ -92,7 +87,7 @@ def test_push(self): self.abcd.create() xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -104,7 +99,7 @@ def test_push(self): xyz_2 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" W 0.00000000 0.00000000 0.00000000 W 0.00000000 0.00000000 0.00000000 """ @@ -131,7 +126,7 @@ def test_bulk(self): self.abcd.create() xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -142,7 +137,7 @@ def test_bulk(self): xyz_2 = StringIO( """1 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 """ ) @@ -178,7 +173,7 @@ def test_count(self): self.abcd.create() xyz = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t e s t _ s t r" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ From 462325f17ea263450238432aba8bb47913daa278 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 30 Aug 2023 08:36:10 +0000 Subject: [PATCH 032/112] Add notebook for extra information examples --- tutorials/abcd_opensearch_properties.ipynb | 644 +++++++++++++++++++++ tutorials/abcd_opensearch_uploading.ipynb | 2 +- tutorials/abcd_uploading.ipynb | 2 +- 3 files changed, 646 insertions(+), 2 deletions(-) create mode 100644 tutorials/abcd_opensearch_properties.ipynb diff --git a/tutorials/abcd_opensearch_properties.ipynb b/tutorials/abcd_opensearch_properties.ipynb new file mode 100644 index 00000000..400e36f8 --- /dev/null +++ b/tutorials/abcd_opensearch_properties.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "# Usage of ABCD database with extra information" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from abcd import ABCD\n", + "from abcd.backends.atoms_properties import Properties" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "First of all, we need to define the url of the database. It could be local or remote:\n", + "\n", + "- direct access: url = 'opensearch://admin:admin@localhost:9200'\n", + "- api access: url = 'http://localhost/api'\n", + "\n", + "using with statement to catch the riased exceptions. You may can ignore them but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "url = 'opensearch://admin:admin@localhost:9200'\n", + "abcd = ABCD.from_url(url)\n", + "\n", + "print(abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 2056\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Cleanup \n", + "\n", + "WARNING!! Remove all elements from the database.\n", + "Only supported in the case of local access" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.destroy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.create()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 0\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Uploading configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from ase.io import iread, read" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "is_executing": false, + "metadata": false, + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/ubuntu/abcd/tutorials\n" + ] + } + ], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "file = directory / 'input.data.2055.xyz'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 14.7 s, sys: 215 ms, total: 14.9 s\n", + "Wall time: 22.3 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with abcd as db:\n", + " for atoms in iread(file.as_posix(), index=slice(None)):\n", + " db.push(atoms, store_calc=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Specify a csv file containing extra information, and a template for the structures corresponding to each row in the csv file." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "is_executing": false, + "metadata": false, + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "csv_file = directory / 'DATA_copy.csv'\n", + "struct_file_template = str(directory) + \"/{struct_name}_FSR-out.cif\"" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "properties = Properties(\n", + " csv_file=csv_file,\n", + " store_struct_file=True,\n", + " struct_file_template=struct_file_template,\n", + " struct_name_label = \"MOF_name\",\n", + " infer_units=True,\n", + " # units={\"Density\": \"g/cm3\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Upload each row, using the infered structure file:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "for i, data in enumerate(properties.to_list()):\n", + " if data['MOF_name'] == \"EWIKAX03\":\n", + " atoms = read(properties.struct_files[i])\n", + " with abcd as db:\n", + " db.push(\n", + " atoms,\n", + " store_calc=False,\n", + " extra_info=data,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Dos at Fermi energy': 'eln/cell',\n", + " 'Dos at VBM': 'eln/cell',\n", + " 'Dos at CBM': 'eln/cell',\n", + " 'Density': 'g/cm3',\n", + " 'Accessible Surface Area': 'm2/g'}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"units\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 2056\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'spacegroup': Spacegroup(1, setting=1), 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}}\n" + ] + } + ], + "source": [ + "print(atoms.info)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": {} + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'info': ['1aromatico-up', '2D', '2aromatici-up', '5-m-rings', '5m-ring-leg2met', '6m-rings', 'Accessible Surface Area', 'Band_gap', 'CN-M', 'COOM', 'Cell volume', 'Crit: metal', 'Crit: pi-pi stacking', 'Crit: redox active linker', 'Crit: redox match', 'Criteria#', 'Density', 'Dos at CBM', 'Dos at Fermi energy', 'Dos at VBM', 'HSE band gap', 'LCD', 'M-C-C-TRIANG', 'M-H2O-M', 'M-N-NM-N-M', 'M-h2o', 'MOF_name', 'Metal', 'Metal density', 'Metals number', 'Multiplier_Sum', 'N3--NCN up', 'PLD', 'Space_group', 'Space_group#', 'Temp', 'Volume Fraction', 'Year', 'Zprime', 'benzene', 'cell', 'energy', 'formula', 'metal-N', 'metal-O', 'metal-S', 'metal-halogen', 'n_atoms', 'occupancy', 'pbc', 'pyridine', 'pyrimidine', 'spacegroup', 'unit_cell', 'units', 'volume', 'without ions'], 'derived': ['elements', 'hash', 'hash_structure', 'modified', 'uploaded', 'username', 'volume'], 'arrays': ['forces', 'numbers', 'positions', 'spacegroup_kinds']}\n" + ] + } + ], + "source": [ + "print(abcd.properties())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{29.868799209594727: 1}\n" + ] + } + ], + "source": [ + "# print(abcd.property(\"6m-rings\"))\n", + "print(abcd.count_property(\"Dos at Fermi energy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "{'_id': '30DtQ4oBtksDlC5rnd72', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-08-30T00:52:40.560182', 'modified': '2023-08-30T00:52:40.560190', 'hash_structure': 'f6bbc3390f53917f801298dc22827262', 'hash': '0cd93d1155d74a60380a615249629708', 'derived': {'arrays_keys': ['positions', 'numbers', 'spacegroup_kinds'], 'info_keys': ['unit_cell', 'n_atoms', 'occupancy', 'spacegroup', 'cell', 'formula', 'pbc', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n" + ] + } + ], + "source": [ + "query = 'n_atoms: 140'\n", + "print(len(list(abcd.get_items(query))))\n", + "print(list(abcd.get_items(query))[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "query = 'Accessible Surface*'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "query = 'Year: [2006 TO 2009]'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2056\n" + ] + } + ], + "source": [ + "query = '*ubuntu'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "316\n" + ] + } + ], + "source": [ + "query = 'username:[ubunta TO ubuntx] AND formula:?48H28O32Zr6'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "{'_id': '30DtQ4oBtksDlC5rnd72', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-08-30T00:52:40.560182', 'modified': '2023-08-30T00:52:40.560190', 'hash_structure': 'f6bbc3390f53917f801298dc22827262', 'hash': '0cd93d1155d74a60380a615249629708', 'derived': {'arrays_keys': ['positions', 'numbers', 'spacegroup_kinds'], 'info_keys': ['unit_cell', 'n_atoms', 'occupancy', 'spacegroup', 'cell', 'formula', 'pbc', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", + "dict_keys(['_id', 'n_atoms', 'cell', 'pbc', 'formula', 'numbers', 'positions', 'spacegroup_kinds', 'spacegroup', 'unit_cell', 'occupancy', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume', 'elements', 'username', 'uploaded', 'modified', 'hash_structure', 'hash', 'derived'])\n" + ] + } + ], + "source": [ + "query = 'MOF_name: *'\n", + "print(len(list(abcd.get_items(query))))\n", + "print(list(abcd.get_items(query))[0])\n", + "print(list(abcd.get_items(query))[0].keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "query = 'MOF_name: *'\n", + "abcd.add_property(\n", + " data={\"example_property\": \"example_value\"},\n", + " query=query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['example_value']" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.property(\"example_property\", query)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.rename_property(\n", + " name=\"example_property\",\n", + " new_name=\"renamed_property\",\n", + " query=query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['example_value']" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.property(\"renamed_property\", query)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.delete_property(\n", + " name=\"renamed_property\",\n", + " query=query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['example_value']" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# abcd.property(\"example_property\", query)\n", + "abcd.property(\"renamed_property\", query)" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/abcd_opensearch_uploading.ipynb b/tutorials/abcd_opensearch_uploading.ipynb index 661761b0..ea7fbbce 100644 --- a/tutorials/abcd_opensearch_uploading.ipynb +++ b/tutorials/abcd_opensearch_uploading.ipynb @@ -216,7 +216,7 @@ "pycharm": {} }, "source": [ - "Uploading configurations on-by-one directly from an ase atoms object:" + "Uploading configurations one-by-one directly from an ase atoms object:" ] }, { diff --git a/tutorials/abcd_uploading.ipynb b/tutorials/abcd_uploading.ipynb index 0b1640c4..9d9c9029 100644 --- a/tutorials/abcd_uploading.ipynb +++ b/tutorials/abcd_uploading.ipynb @@ -201,7 +201,7 @@ "pycharm": {} }, "source": [ - "Uploading configurations on-by-one directly from an ase atoms object:" + "Uploading configurations one-by-one directly from an ase atoms object:" ] }, { From c5d3ad567aac183f25e8d66f67aa769b14003e5f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 31 Aug 2023 11:21:10 +0000 Subject: [PATCH 033/112] Enable kwargs for OpenSearch client settings --- abcd/__init__.py | 2 +- abcd/backends/atoms_opensearch.py | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/abcd/__init__.py b/abcd/__init__.py index dd3bddda..2bb82cf9 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -58,7 +58,7 @@ def from_url(cls, url, **kwargs): from abcd.backends.atoms_opensearch import OpenSearchDatabase - return OpenSearchDatabase(db_name=db, **conn_settings, **kwargs) + return OpenSearchDatabase(db=db, **conn_settings, **kwargs) if r.scheme == "http" or r.scheme == "https": raise NotImplementedError("http not yet supported! soon...") if r.scheme == "ssh": diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index cc1934e2..f9677d70 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -280,14 +280,22 @@ def __init__( logger.info((host, port, index_name, username, password, kwargs)) + client_settings = { + "verify_certs": False, + "ca_certs": None, + "use_ssl": True, + "ssl_assert_hostname": False, + "ssl_show_warn": False, + } + + for key in client_settings: + if key in kwargs: + client_settings[key] = kwargs[key] + self.client = OpenSearch( hosts=[{"host": host, "port": port}], http_auth=(username, password), - verify_certs=False, - ca_certs=False, - use_ssl=True, - ssl_assert_hostname=False, - ssl_show_warn=False, + **client_settings, ) try: From 4520f5140bbf3860d649a93c1a565330d3d84238 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 31 Aug 2023 18:20:44 +0000 Subject: [PATCH 034/112] Fix property function if property not present --- abcd/backends/atoms_opensearch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index f9677d70..720a5ef4 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -563,6 +563,7 @@ def property(self, name, query: Union[dict, str, None] = None) -> list: stored_fields=format(name), _source=format(name), ) + if format(name) in hit["_source"] ] def count_property(self, name, query: Union[dict, str, None] = None) -> dict: From 13bfe7681630be7fd4cd8782e348682e8f2492e0 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 25 Aug 2023 17:42:53 +0000 Subject: [PATCH 035/112] Run OpenSearch unit tests with GitHub Actions --- .ci/opensearch/Dockerfile | 7 + .ci/opensearch/action.yml | 32 ++ .ci/opensearch/functions/imports.sh | 9 + .../functions/wait-for-container.sh | 42 +++ .ci/opensearch/run-opensearch.sh | 50 +++ .ci/opensearch/test.sh | 7 + .github/workflows/ci.yml | 19 + .github/workflows/continuous-integration.yml | 27 -- tests/database.py | 12 +- tests/opensearch.py | 347 ++++++++++++++++++ 10 files changed, 519 insertions(+), 33 deletions(-) create mode 100644 .ci/opensearch/Dockerfile create mode 100644 .ci/opensearch/action.yml create mode 100755 .ci/opensearch/functions/imports.sh create mode 100755 .ci/opensearch/functions/wait-for-container.sh create mode 100755 .ci/opensearch/run-opensearch.sh create mode 100755 .ci/opensearch/test.sh delete mode 100644 .github/workflows/continuous-integration.yml create mode 100644 tests/opensearch.py diff --git a/.ci/opensearch/Dockerfile b/.ci/opensearch/Dockerfile new file mode 100644 index 00000000..235c3b02 --- /dev/null +++ b/.ci/opensearch/Dockerfile @@ -0,0 +1,7 @@ +FROM docker:stable + +RUN apk add --update bash + +COPY run-opensearch.sh /run-opensearch.sh + +ENTRYPOINT ["/run-opensearch.sh"] diff --git a/.ci/opensearch/action.yml b/.ci/opensearch/action.yml new file mode 100644 index 00000000..bb516d35 --- /dev/null +++ b/.ci/opensearch/action.yml @@ -0,0 +1,32 @@ +name: 'Run OpenSearch' +description: 'This action spins up an Opensearch instance that can be accessed and used in your subsequent steps.' + +inputs: + opensearch-version: + description: 'The version of the OpenSearch you want to run' + required: true + security-enabled: + description: 'Enable or disable HTTPS, enabled by default' + default: 'false' + required: false + nodes: + description: 'Number of nodes in the cluster' + required: false + default: 1 + port: + description: 'Port where you want to run OpenSearch' + required: false + default: 9200 + opensearch_password: + description: 'The password for the user admin in your cluster' + required: false + default: 'admin' + +runs: + using: 'docker' + image: 'Dockerfile' + env: + OPENSEARCH_VERSION: ${{ inputs.opensearch-version }} + NODES: ${{ inputs.nodes }} + PORT: ${{ inputs.port }} + SECURITY_ENABLED: ${{ inputs.security-enabled }} diff --git a/.ci/opensearch/functions/imports.sh b/.ci/opensearch/functions/imports.sh new file mode 100755 index 00000000..a3ece964 --- /dev/null +++ b/.ci/opensearch/functions/imports.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# +# Sets up all the common variables and imports relevant functions +# +# Version 1.0.1 +# - Initial version after refactor +# From https://github.com/opensearch-project/opensearch-py/blob/main/.ci/functions/imports.sh + +source ./.ci/opensearch/functions/wait-for-container.sh diff --git a/.ci/opensearch/functions/wait-for-container.sh b/.ci/opensearch/functions/wait-for-container.sh new file mode 100755 index 00000000..64c42d4a --- /dev/null +++ b/.ci/opensearch/functions/wait-for-container.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Exposes a routine scripts can call to wait for a container if that container set up a health command +# +# Please source .ci/functions/imports.sh as a whole not just this file +# +# Version 1.0.1 +# - Initial version after refactor +# - Make sure wait_for_contiainer is silent +# From https://github.com/opensearch-project/opensearch-py/blob/main/.ci/functions/wait-for-container.sh + +function container_running { + if [[ "$(docker ps -q -f name=$1)" ]]; then + return 0; + else return 1; + fi +} + +function wait_for_container { + set +x + until ! container_running "$1" || (container_running "$1" && [[ "$(docker inspect -f "{{.State.Health.Status}}" ${1})" != "starting" ]]); do + echo "" + docker inspect -f "{{range .State.Health.Log}}{{.Output}}{{end}}" ${1} + echo -e "\033[34;1mINFO:\033[0m waiting for node $1 to be up\033[0m" + sleep 2; + done; + + # Always show logs if the container is running, this is very useful both on CI as well as while developing + if container_running $1; then + docker logs $1 + fi + + if ! container_running $1 || [[ "$(docker inspect -f "{{.State.Health.Status}}" ${1})" != "healthy" ]]; then + echo -e "\033[31;1mERROR:\033[0m Failed to start $1 in detached mode beyond health checks\033[0m" + echo -e "\033[31;1mERROR:\033[0m dumped the docker log before shutting the node down\033[0m" + return 1 + else + echo + echo -e "\033[32;1mSUCCESS:\033[0m Detached and healthy: ${1}\033[0m" + return 0 + fi +} diff --git a/.ci/opensearch/run-opensearch.sh b/.ci/opensearch/run-opensearch.sh new file mode 100755 index 00000000..22adc630 --- /dev/null +++ b/.ci/opensearch/run-opensearch.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +source ./.ci/opensearch/functions/imports.sh +set -euxo pipefail + +if [[ -z $OPENSEARCH_VERSION ]]; then + echo -e "\033[31;1mERROR:\033[0m Required environment variable [OPENSEARCH_VERSION] not set\033[0m" + exit 1 +fi + +for (( node=1; node<=${NODES-1}; node++ )) +do + port=$((PORT + $node - 1)) + + if [[ "$SECURITY_ENABLED" == "true" ]]; then + healthcmd="curl -vvv -s --insecure -u admin:admin --fail https://localhost:$port/_cluster/health || exit 1" + security=($(cat <<-END + +END + )) + elif [[ "$SECURITY_ENABLED" == "false" ]]; then + healthcmd="curl -vvv -s --fail http://localhost:$port/_cluster/health || exit 1" + security=($(cat <<-END + --env plugins.security.disabled=true +END + )) + fi + + docker run \ + --rm \ + --detach \ + --name="os${node}" \ + --env "cluster.name=docker-opensearch" \ + --env "http.port=${port}" \ + --env discovery.type=single-node \ + --env bootstrap.memory_lock=true \ + --env "OPENSEARCH_JAVA_OPTS=-Xms4g -Xmx4g" \ + "${security[@]}" \ + --publish "${port}:${port}" \ + --ulimit nofile=65536:65536 \ + --ulimit memlock=-1:-1 \ + --health-cmd="$(echo $healthcmd)" \ + --health-interval=2s \ + --health-retries=20 \ + --health-timeout=2s \ + opensearchproject/opensearch:${OPENSEARCH_VERSION} + + if wait_for_container "os$node"; then + echo -e "\033[32;1mSUCCESS:\033[0m OpenSearch up and running\033[0m" + fi +done diff --git a/.ci/opensearch/test.sh b/.ci/opensearch/test.sh new file mode 100755 index 00000000..c4eb9fd4 --- /dev/null +++ b/.ci/opensearch/test.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +script_path=$(dirname $(realpath -s $0)) +source $script_path/functions/imports.sh +set -euxo pipefail + +echo $script_path/functions/imports.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f2e1020c..f1e1c3e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,10 +9,26 @@ jobs: strategy: matrix: python-version: [ "3.9", "3.10", "3.11", "3.12" ] + opensearch: ['1.0.1', '2.0.1', 'latest'] + security-enabled: ["true", "false"] steps: - uses: actions/checkout@v4 + - name: Configure sysctl limits + run: | + sudo swapoff -a + sudo sysctl -w vm.swappiness=1 + sudo sysctl -w fs.file-max=262144 + sudo sysctl -w vm.max_map_count=262144 + + - name: Start OpenSearch + uses: ./.ci/opensearch + with: + port: 9250 + opensearch-version: ${{ matrix.opensearch }} + security-enabled: ${{ matrix.security-enabled }} + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -29,6 +45,9 @@ jobs: - name: Run unit tests run: | poetry run pytest --cov=abcd --cov-report xml --cov-report term:skip-covered + env: + port: 9250 + security_enabled: ${{ matrix.security-enabled }} - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.4.1 diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml deleted file mode 100644 index 4888b07f..00000000 --- a/.github/workflows/continuous-integration.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Python package - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] - - steps: - - uses: actions/checkout@v3 - - name: Install poetry - run: pipx install poetry - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: 'poetry' - - name: Install dependencies - run: | - poetry install --with dev - - name: Run unit tests - run: | - poetry run python -m unittest -v tests - diff --git a/tests/database.py b/tests/database.py index d850c76b..ee4e2935 100644 --- a/tests/database.py +++ b/tests/database.py @@ -113,8 +113,8 @@ def test_push(self): None, self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() - assert atoms_1 == result - assert atoms_2 != result + self.assertEqual(atoms_1, result) + self.assertNotEqual(atoms_2, result) def test_bulk(self): from io import StringIO @@ -149,7 +149,7 @@ def test_bulk(self): atoms_list.append(atoms_1) atoms_list.append(atoms_2) self.abcd.push(atoms_list) - assert self.abcd.count() == 2 + self.assertEqual(self.abcd.count(), 2) result_1 = AtomsModel( None, @@ -161,8 +161,8 @@ def test_bulk(self): None, self.abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], ).to_ase() - assert atoms_1 == result_1 - assert atoms_2 == result_2 + self.assertEqual(atoms_1, result_1) + self.assertEqual(atoms_2, result_2) def test_count(self): from io import StringIO @@ -184,7 +184,7 @@ def test_count(self): atoms.set_cell([1, 1, 1]) self.abcd.push(atoms) self.abcd.push(atoms) - assert self.abcd.count() == 2 + self.assertEqual(self.abcd.count(), 2) if __name__ == "__main__": diff --git a/tests/opensearch.py b/tests/opensearch.py new file mode 100644 index 00000000..c2ffbf61 --- /dev/null +++ b/tests/opensearch.py @@ -0,0 +1,347 @@ +import unittest +from abcd import ABCD +import logging + +class OpenSearch(unittest.TestCase): + @classmethod + def setUpClass(cls): + + import os + if os.getenv("GITHUB_ACTIONS") != "true": + raise unittest.SkipTest("Only runs via GitHub Actions") + + cls.security_enabled = os.getenv("security_enabled") == "true" + cls.port = int(os.environ["port"]) + cls.host = "localhost" + + from abcd.backends.atoms_opensearch import OpenSearchDatabase + + logging.basicConfig(level=logging.INFO) + url = f"opensearch://admin:admin@{cls.host}:{cls.port}" + abcd = ABCD.from_url( + url, + index_name="test_index", + analyse_schema=False, + use_ssl=cls.security_enabled + ) + assert isinstance(abcd, OpenSearchDatabase) + cls.abcd = abcd + + @classmethod + def tearDownClass(cls): + cls.abcd.destroy() + + def test_info(self): + self.abcd.destroy() + self.abcd.create() + self.abcd.client.indices.refresh(index="test_index") + self.abcd.print_info() + + info = { + "host": self.host, + "port": self.port, + "db": "abcd", + "index": "test_index", + "number of confs": 0, + "type": "opensearch", + } + self.assertEqual(self.abcd.info(), info) + + def test_destroy(self): + self.abcd.create() + self.abcd.client.indices.refresh(index="test_index") + self.assertTrue(self.abcd.client.indices.exists("test_index")) + + self.abcd.destroy() + self.assertFalse(self.abcd.client.indices.exists("test_index")) + return + + def test_delete(self): + self.abcd.create() + self.abcd.client.indices.refresh(index="test_index") + self.assertTrue(self.abcd.client.indices.exists("test_index")) + + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + self.abcd.push(atoms) + self.abcd.push(atoms) + + self.abcd.client.indices.refresh(index="test_index") + self.assertEqual(self.abcd.count(), 2) + + self.abcd.delete() + self.assertTrue(self.abcd.client.indices.exists("test_index")) + self.abcd.client.indices.refresh(index="test_index") + self.assertEqual(self.abcd.count(), 0) + return + + def test_create(self): + self.abcd.destroy() + self.abcd.create() + + self.abcd.client.indices.refresh(index="test_index") + self.assertTrue(self.abcd.client.indices.exists("test_index")) + self.assertFalse(self.abcd.client.indices.exists("fake_index")) + + def test_push(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + from abcd.backends.atoms_opensearch import AtomsModel + + self.abcd.destroy() + self.abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + self.abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + W 0.00000000 0.00000000 0.00000000 + W 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + self.abcd.client.indices.refresh(index="test_index") + result = AtomsModel( + None, + None, + self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + self.assertEqual(atoms_1, result) + self.assertNotEqual(atoms_2, result) + + def test_bulk(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + from abcd.backends.atoms_opensearch import AtomsModel + + self.abcd.destroy() + self.abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + + xyz_2 = StringIO( + """1 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + atoms_list = [] + atoms_list.append(atoms_1) + atoms_list.append(atoms_2) + self.abcd.push(atoms_list) + + self.abcd.client.indices.refresh(index="test_index") + self.assertEqual(self.abcd.count(), 2) + result_1 = AtomsModel( + None, + None, + self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + result_2 = AtomsModel( + None, + None, + self.abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + ).to_ase() + self.assertEqual(atoms_1, result_1) + self.assertEqual(atoms_2, result_2) + + def test_count(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + self.abcd.destroy() + self.abcd.create() + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + self.abcd.push(atoms) + self.abcd.push(atoms) + + self.abcd.client.indices.refresh(index="test_index") + self.assertEqual(self.abcd.count(), 2) + + def test_property(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + self.abcd.destroy() + self.abcd.create() + + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" test_prop_1="test_prop_1" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + self.abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" test_prop_2="test_2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + self.abcd.push(atoms_2) + + self.abcd.client.indices.refresh(index="test_index") + prop = self.abcd.property('test_prop_1') + expected_prop = ['test_prop_1'] + self.assertEqual(prop, expected_prop) + + def test_properties(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + self.abcd.destroy() + self.abcd.create() + + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + self.abcd.push(atoms) + + self.abcd.client.indices.refresh(index="test_index") + props = self.abcd.properties() + expected_props = { + 'info': ['_vtk_test', 'cell', 'formula', 'n_atoms', 'pbc', 's', 'volume'], + 'derived': [ + 'elements', + 'hash', + 'hash_structure', + 'modified', + 'uploaded', + 'username', + 'volume' + ], + 'arrays': ['numbers', 'positions'] + } + self.assertEqual(props, expected_props) + + def test_count_properties(self): + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + self.abcd.destroy() + self.abcd.create() + + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" test_prop_1="test_1" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + self.abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" test_prop_2="test_2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + self.abcd.push(atoms_2) + + self.abcd.client.indices.refresh(index="test_index") + props = self.abcd.count_properties() + expected_counts = { + 'test_prop_1': {'count': 1, 'category': 'info', 'dtype': 'scalar(str)'}, + 'test_prop_2': {'count': 1, 'category': 'info', 'dtype': 'scalar(str)'}, + 'cell': {'count': 2, 'category': 'info', 'dtype': 'array(float)'}, + 'elements': {'count': 2, 'category': 'derived', 'dtype': 'scalar(dict)'}, + 'formula': {'count': 2, 'category': 'info', 'dtype': 'scalar(str)'}, + 'hash': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, + 'hash_structure': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, + 'modified': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, + 'n_atoms': {'count': 2, 'category': 'info', 'dtype': 'scalar(int)'}, + 'numbers': {'count': 2, 'category': 'arrays', 'dtype': 'vector(int, N)'}, + 'pbc': {'count': 2, 'category': 'info', 'dtype': 'vector(bool)'}, + 'positions': {'count': 2, 'category': 'arrays', 'dtype': 'array(float, N x 3)'}, + 's': {'count': 2, 'category': 'info', 'dtype': 'scalar(str)'}, + 'uploaded': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, + 'username': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, + 'volume': {'count': 2, 'category': 'derived', 'dtype': 'scalar(float)'} + } + + self.assertEqual(props, expected_counts) + +if __name__ == "__main__": + unittest.main(verbosity=1, exit=False) From f5e290220cc6ce8d6d99faf34a2d81e6e43b00b0 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 1 Sep 2023 10:59:08 +0000 Subject: [PATCH 036/112] Add refresh function for OpenSearch --- abcd/backends/atoms_opensearch.py | 6 ++++++ tests/opensearch.py | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 720a5ef4..5263681c 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -368,6 +368,12 @@ def create(self): """ self.client.indices.create(index=self.index_name, ignore=400) + def refresh(self): + """ + Refresh index to ensure recent operations performed are available for search. + """ + self.client.indices.refresh(index=self.index_name) + def save_bulk(self, actions: Iterable): """ Save a collection of documents in bulk. diff --git a/tests/opensearch.py b/tests/opensearch.py index c2ffbf61..db74ffe3 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -34,7 +34,7 @@ def tearDownClass(cls): def test_info(self): self.abcd.destroy() self.abcd.create() - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.abcd.print_info() info = { @@ -49,7 +49,7 @@ def test_info(self): def test_destroy(self): self.abcd.create() - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertTrue(self.abcd.client.indices.exists("test_index")) self.abcd.destroy() @@ -58,7 +58,7 @@ def test_destroy(self): def test_delete(self): self.abcd.create() - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertTrue(self.abcd.client.indices.exists("test_index")) from io import StringIO @@ -79,12 +79,12 @@ def test_delete(self): self.abcd.push(atoms) self.abcd.push(atoms) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertEqual(self.abcd.count(), 2) self.abcd.delete() self.assertTrue(self.abcd.client.indices.exists("test_index")) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertEqual(self.abcd.count(), 0) return @@ -92,7 +92,7 @@ def test_create(self): self.abcd.destroy() self.abcd.create() - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertTrue(self.abcd.client.indices.exists("test_index")) self.assertFalse(self.abcd.client.indices.exists("fake_index")) @@ -127,7 +127,7 @@ def test_push(self): assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() result = AtomsModel( None, None, @@ -170,7 +170,7 @@ def test_bulk(self): atoms_list.append(atoms_2) self.abcd.push(atoms_list) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertEqual(self.abcd.count(), 2) result_1 = AtomsModel( None, @@ -206,7 +206,7 @@ def test_count(self): self.abcd.push(atoms) self.abcd.push(atoms) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() self.assertEqual(self.abcd.count(), 2) def test_property(self): @@ -243,7 +243,7 @@ def test_property(self): atoms_2.set_cell([1, 1, 1]) self.abcd.push(atoms_2) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() prop = self.abcd.property('test_prop_1') expected_prop = ['test_prop_1'] self.assertEqual(prop, expected_prop) @@ -269,7 +269,7 @@ def test_properties(self): atoms.set_cell([1, 1, 1]) self.abcd.push(atoms) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() props = self.abcd.properties() expected_props = { 'info': ['_vtk_test', 'cell', 'formula', 'n_atoms', 'pbc', 's', 'volume'], @@ -320,7 +320,7 @@ def test_count_properties(self): atoms_2.set_cell([1, 1, 1]) self.abcd.push(atoms_2) - self.abcd.client.indices.refresh(index="test_index") + self.abcd.refresh() props = self.abcd.count_properties() expected_counts = { 'test_prop_1': {'count': 1, 'category': 'info', 'dtype': 'scalar(str)'}, From 83c262d97db8f6236bb16132393c3c94a18fbbf9 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 1 Sep 2023 12:41:32 +0000 Subject: [PATCH 037/112] Update unit tests --- tests/database.py | 25 +++ tests/opensearch.py | 444 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 372 insertions(+), 97 deletions(-) diff --git a/tests/database.py b/tests/database.py index ee4e2935..92e9fb92 100644 --- a/tests/database.py +++ b/tests/database.py @@ -50,9 +50,16 @@ def test_push(self): class OpenSearch(unittest.TestCase): + """ + Testing mock OpenSearch database functions. + """ + @classmethod @openmock def setUpClass(cls): + """ + Set up database connection. + """ from abcd.backends.atoms_opensearch import OpenSearchDatabase logging.basicConfig(level=logging.INFO) @@ -63,21 +70,33 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): + """ + Delete index from database. + """ cls.abcd.destroy() def test_destroy(self): + """ + Test destroying database index. + """ self.assertTrue(self.abcd.client.indices.exists("test_index")) self.abcd.destroy() self.assertFalse(self.abcd.client.indices.exists("test_index")) return def test_create(self): + """ + Test creating database index. + """ self.abcd.destroy() self.abcd.create() self.assertTrue(self.abcd.client.indices.exists("test_index")) self.assertFalse(self.abcd.client.indices.exists("fake_index")) def test_push(self): + """ + Test pushing atoms objects to database individually. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -117,6 +136,9 @@ def test_push(self): self.assertNotEqual(atoms_2, result) def test_bulk(self): + """ + Test pushing atoms object to database together. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -165,6 +187,9 @@ def test_bulk(self): self.assertEqual(atoms_2, result_2) def test_count(self): + """ + Test counting the number of documents in the database. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms diff --git a/tests/opensearch.py b/tests/opensearch.py index db74ffe3..9106f448 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -2,14 +2,21 @@ from abcd import ABCD import logging + class OpenSearch(unittest.TestCase): + """ + Testing live OpenSearch database functions. + """ + @classmethod def setUpClass(cls): - + """ + Set up database connection. + """ import os + if os.getenv("GITHUB_ACTIONS") != "true": raise unittest.SkipTest("Only runs via GitHub Actions") - cls.security_enabled = os.getenv("security_enabled") == "true" cls.port = int(os.environ["port"]) cls.host = "localhost" @@ -17,21 +24,50 @@ def setUpClass(cls): from abcd.backends.atoms_opensearch import OpenSearchDatabase logging.basicConfig(level=logging.INFO) + url = f"opensearch://admin:admin@{cls.host}:{cls.port}" abcd = ABCD.from_url( url, index_name="test_index", analyse_schema=False, - use_ssl=cls.security_enabled + use_ssl=cls.security_enabled, ) assert isinstance(abcd, OpenSearchDatabase) cls.abcd = abcd @classmethod def tearDownClass(cls): + """ + Delete index from database. + """ cls.abcd.destroy() + def push_data(self): + """ + Uploads an example xyz file to the database. + """ + from io import StringIO + from ase.io import read + from ase.atoms import Atoms + + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + self.abcd.push(atoms) + self.abcd.refresh() + def test_info(self): + """ + Test printing database info. + """ self.abcd.destroy() self.abcd.create() self.abcd.refresh() @@ -48,6 +84,10 @@ def test_info(self): self.assertEqual(self.abcd.info(), info) def test_destroy(self): + """ + Test destroying database index. + """ + self.abcd.destroy() self.abcd.create() self.abcd.refresh() self.assertTrue(self.abcd.client.indices.exists("test_index")) @@ -55,48 +95,21 @@ def test_destroy(self): self.abcd.destroy() self.assertFalse(self.abcd.client.indices.exists("test_index")) return - - def test_delete(self): - self.abcd.create() - self.abcd.refresh() - self.assertTrue(self.abcd.client.indices.exists("test_index")) - - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - - xyz = StringIO( - """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" - Si 0.00000000 0.00000000 0.00000000 - Si 0.00000000 0.00000000 0.00000000 - """ - ) - - atoms = read(xyz, format="extxyz") - assert isinstance(atoms, Atoms) - atoms.set_cell([1, 1, 1]) - self.abcd.push(atoms) - self.abcd.push(atoms) - - self.abcd.refresh() - self.assertEqual(self.abcd.count(), 2) - - self.abcd.delete() - self.assertTrue(self.abcd.client.indices.exists("test_index")) - self.abcd.refresh() - self.assertEqual(self.abcd.count(), 0) - return def test_create(self): + """ + Test creating database index. + """ self.abcd.destroy() self.abcd.create() - self.abcd.refresh() self.assertTrue(self.abcd.client.indices.exists("test_index")) self.assertFalse(self.abcd.client.indices.exists("fake_index")) def test_push(self): + """ + Test pushing atoms objects to database individually. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -136,7 +149,24 @@ def test_push(self): self.assertEqual(atoms_1, result) self.assertNotEqual(atoms_2, result) + def test_delete(self): + """ + Test deleting all documents from database. + """ + self.push_data() + self.push_data() + + self.assertEqual(self.abcd.count(), 2) + self.abcd.delete() + self.assertTrue(self.abcd.client.indices.exists("test_index")) + self.abcd.refresh() + self.assertEqual(self.abcd.count(), 0) + return + def test_bulk(self): + """ + Test pushing atoms object to database together. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -186,30 +216,84 @@ def test_bulk(self): self.assertEqual(atoms_2, result_2) def test_count(self): + """ + Test counting the number of documents in the database. + """ + self.abcd.destroy() + self.abcd.create() + self.push_data() + self.push_data() + self.assertEqual(self.abcd.count(), 2) + + def test_property(self): + """ + Test getting values of a property from the database. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms self.abcd.destroy() self.abcd.create() - xyz = StringIO( + + xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" prop_1="test_1" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ ) - atoms = read(xyz, format="extxyz") - assert isinstance(atoms, Atoms) - atoms.set_cell([1, 1, 1]) - self.abcd.push(atoms) - self.abcd.push(atoms) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + self.abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_2="test_2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + self.abcd.push(atoms_2) self.abcd.refresh() - self.assertEqual(self.abcd.count(), 2) + prop = self.abcd.property("prop_1") + expected_prop = ["test_1"] + self.assertEqual(prop, expected_prop) - def test_property(self): + def test_properties(self): + """ + Test getting all properties from the database. + """ + self.abcd.destroy() + self.abcd.create() + self.push_data() + props = self.abcd.properties() + expected_props = { + "info": ["_vtk_test", "cell", "formula", "n_atoms", "pbc", "s", "volume"], + "derived": [ + "elements", + "hash", + "hash_structure", + "modified", + "uploaded", + "username", + "volume", + ], + "arrays": ["numbers", "positions"], + } + self.assertEqual(props, expected_props) + + def test_count_property(self): + """ + Test counting values of specified properties from the database. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -219,7 +303,7 @@ def test_property(self): xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" test_prop_1="test_prop_1" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" prop_1="1" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -231,9 +315,8 @@ def test_property(self): self.abcd.push(atoms_1) xyz_2 = StringIO( - """2 - Properties=species:S:1:pos:R:3 s="sadf" test_prop_2="test_2" pbc="F F F" - Si 0.00000000 0.00000000 0.00000000 + """1 + Properties=species:S:1:pos:R:3 s="sadf" prop_2="2" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 """ ) @@ -244,11 +327,14 @@ def test_property(self): self.abcd.push(atoms_2) self.abcd.refresh() - prop = self.abcd.property('test_prop_1') - expected_prop = ['test_prop_1'] - self.assertEqual(prop, expected_prop) + self.assertEqual(self.abcd.count_property("prop_1"), {1: 1}) + self.assertEqual(self.abcd.count_property("n_atoms"), {1: 1, 2: 1}) + self.assertEqual(self.abcd.count_property("volume"), {1.0: 2}) - def test_properties(self): + def test_count_properties(self): + """ + Test counting appearences of each property in documents in the database. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -256,37 +342,213 @@ def test_properties(self): self.abcd.destroy() self.abcd.create() - xyz = StringIO( + xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" prop_1="test_1" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ ) - atoms = read(xyz, format="extxyz") - assert isinstance(atoms, Atoms) - atoms.set_cell([1, 1, 1]) - self.abcd.push(atoms) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + self.abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_2="test_2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + self.abcd.push(atoms_2) self.abcd.refresh() - props = self.abcd.properties() - expected_props = { - 'info': ['_vtk_test', 'cell', 'formula', 'n_atoms', 'pbc', 's', 'volume'], - 'derived': [ - 'elements', - 'hash', - 'hash_structure', - 'modified', - 'uploaded', - 'username', - 'volume' - ], - 'arrays': ['numbers', 'positions'] + props = self.abcd.count_properties() + expected_counts = { + "prop_1": {"count": 1, "category": "info", "dtype": "scalar(str)"}, + "prop_2": {"count": 1, "category": "info", "dtype": "scalar(str)"}, + "cell": {"count": 2, "category": "info", "dtype": "array(float)"}, + "elements": {"count": 2, "category": "derived", "dtype": "scalar(dict)"}, + "formula": {"count": 2, "category": "info", "dtype": "scalar(str)"}, + "hash": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "hash_structure": { + "count": 2, + "category": "derived", + "dtype": "scalar(str)", + }, + "modified": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "n_atoms": {"count": 2, "category": "info", "dtype": "scalar(int)"}, + "numbers": {"count": 2, "category": "arrays", "dtype": "vector(int, N)"}, + "pbc": {"count": 2, "category": "info", "dtype": "vector(bool)"}, + "positions": { + "count": 2, + "category": "arrays", + "dtype": "array(float, N x 3)", + }, + "s": {"count": 2, "category": "info", "dtype": "scalar(str)"}, + "uploaded": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "username": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "volume": {"count": 2, "category": "derived", "dtype": "scalar(float)"}, } - self.assertEqual(props, expected_props) - def test_count_properties(self): + self.assertEqual(props, expected_counts) + + def test_add_property(self): + """ + Test adding a property to documents in the database. + """ + self.abcd.destroy() + self.abcd.create() + self.push_data() + self.abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + + self.abcd.refresh() + data = self.abcd.client.search(index="test_index") + self.assertEqual( + data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"], "TEST_VALUE" + ) + self.assertIn( + "TEST_PROPERTY", data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + ) + + def test_rename_property(self): + """ + Test renaming a property for documents in the database. + """ + self.abcd.destroy() + self.abcd.create() + self.push_data() + self.abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + self.abcd.refresh() + self.abcd.rename_property("TEST_PROPERTY", "NEW_PROPERTY") + self.abcd.refresh() + + data = self.abcd.client.search(index="test_index") + self.assertEqual( + data["hits"]["hits"][0]["_source"]["NEW_PROPERTY"], "TEST_VALUE" + ) + + def test_delete_property(self): + """ + Test deleting a property from documents in the database. + """ + self.abcd.destroy() + self.abcd.create() + self.push_data() + + self.abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + self.abcd.refresh() + data = self.abcd.client.search(index="test_index") + self.assertEqual( + data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"], "TEST_VALUE" + ) + + self.abcd.delete_property("TEST_PROPERTY") + self.abcd.refresh() + data = self.abcd.client.search(index="test_index") + with self.assertRaises(KeyError): + data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] + self.assertNotIn( + "TEST_PROPERTY", data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + ) + + def test_get_items(self): + """ + Test getting a dictionary of values from documents in the database. + """ + self.abcd.destroy() + self.abcd.create() + self.push_data() + + expected_items = { + "_id": None, + "n_atoms": 2, + "numbers": [14, 14], + "_vtk_test": "t _ e s t", + "positions": [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + "cell": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + "pbc": [False, False, False], + "volume": 1.0, + "hash_structure": None, + "s": "sadf", + "elements": {"14": 2}, + "uploaded": None, + "formula": "Si2", + "modified": None, + "derived": { + "info_keys": [ + "s", + "n_atoms", + "_vtk_test", + "cell", + "pbc", + "formula", + "volume", + ], + "derived_keys": [ + "elements", + "username", + "uploaded", + "modified", + "volume", + "hash_structure", + "hash", + ], + "arrays_keys": ["numbers", "positions"], + "results_keys": [], + }, + "hash": None, + "username": None, + } + + self.abcd.refresh() + items = list(self.abcd.get_items())[0] + + for key in expected_items: + if key not in [ + "_id", + "hash_structure", + "uploaded", + "modified", + "hash", + "username", + ]: + if isinstance(expected_items[key], dict): + for dict_key in expected_items[key]: + if isinstance(expected_items[key][dict_key], list): + self.assertEqual( + set(expected_items[key][dict_key]), + set(items[key][dict_key]), + ) + else: + self.assertEqual( + expected_items[key][dict_key], items[key][dict_key] + ) + else: + self.assertEqual(expected_items[key], items[key]) + + def test_get_atoms(self): + """ + Test getting values from documents in the database as Atoms objects. + """ + from ase.atoms import Atoms + + self.abcd.destroy() + self.abcd.create() + self.push_data() + expected_atoms = Atoms(symbols="Si2", pbc=False, cell=[1.0, 1.0, 1.0]) + self.assertEqual(expected_atoms, list(self.abcd.get_atoms())[0]) + + def test_query(self): + """ + Test querying documents in the database. + """ from io import StringIO from ase.io import read from ase.atoms import Atoms @@ -296,7 +558,7 @@ def test_count_properties(self): xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" test_prop_1="test_1" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" prop_1="test_1" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -309,7 +571,7 @@ def test_count_properties(self): xyz_2 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" test_prop_2="test_2" pbc="F F F" + Properties=species:S:1:pos:R:3 s="sadf" prop_2="test_2" pbc="F F F" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -319,29 +581,17 @@ def test_count_properties(self): assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) self.abcd.push(atoms_2) - self.abcd.refresh() - props = self.abcd.count_properties() - expected_counts = { - 'test_prop_1': {'count': 1, 'category': 'info', 'dtype': 'scalar(str)'}, - 'test_prop_2': {'count': 1, 'category': 'info', 'dtype': 'scalar(str)'}, - 'cell': {'count': 2, 'category': 'info', 'dtype': 'array(float)'}, - 'elements': {'count': 2, 'category': 'derived', 'dtype': 'scalar(dict)'}, - 'formula': {'count': 2, 'category': 'info', 'dtype': 'scalar(str)'}, - 'hash': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, - 'hash_structure': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, - 'modified': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, - 'n_atoms': {'count': 2, 'category': 'info', 'dtype': 'scalar(int)'}, - 'numbers': {'count': 2, 'category': 'arrays', 'dtype': 'vector(int, N)'}, - 'pbc': {'count': 2, 'category': 'info', 'dtype': 'vector(bool)'}, - 'positions': {'count': 2, 'category': 'arrays', 'dtype': 'array(float, N x 3)'}, - 's': {'count': 2, 'category': 'info', 'dtype': 'scalar(str)'}, - 'uploaded': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, - 'username': {'count': 2, 'category': 'derived', 'dtype': 'scalar(str)'}, - 'volume': {'count': 2, 'category': 'derived', 'dtype': 'scalar(float)'} - } - self.assertEqual(props, expected_counts) + query_dict = {"match": {"n_atoms": 2}} + query_all = "volume: [0 TO 10]" + query_1 = "prop_1: *" + query_2 = "prop_2: *" + self.assertEqual(self.abcd.count(query_dict), 2) + self.assertEqual(self.abcd.count(query_all), 2) + self.assertEqual(self.abcd.count(query_1), 1) + self.assertEqual(self.abcd.count(query_2), 1) + if __name__ == "__main__": unittest.main(verbosity=1, exit=False) From 533b096bc715b2db6da71c9629ddd4c189ed6272 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 1 Sep 2023 13:13:29 +0000 Subject: [PATCH 038/112] Fix delete property function --- abcd/backends/atoms_opensearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 5263681c..f7f7bd00 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -857,7 +857,7 @@ def delete_property(self, name: str, query: Union[dict, str, None] = None): query = self.parser(query) script_txt = f"if (ctx._source.containsKey('{name}')) {{ " - script_txt += "ctx._source.remove('params.name');" + script_txt += "ctx._source.remove(params.name);" script_txt += "for (int i=0; i Date: Fri, 1 Sep 2023 13:26:01 +0000 Subject: [PATCH 039/112] Add tolerance for failed connections in testing --- .../functions/wait-for-container.sh | 2 +- .github/workflows/ci.yml | 1 + tests/opensearch.py | 27 +++++++++++++------ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.ci/opensearch/functions/wait-for-container.sh b/.ci/opensearch/functions/wait-for-container.sh index 64c42d4a..c1b6d5e1 100755 --- a/.ci/opensearch/functions/wait-for-container.sh +++ b/.ci/opensearch/functions/wait-for-container.sh @@ -22,7 +22,7 @@ function wait_for_container { echo "" docker inspect -f "{{range .State.Health.Log}}{{.Output}}{{end}}" ${1} echo -e "\033[34;1mINFO:\033[0m waiting for node $1 to be up\033[0m" - sleep 2; + sleep 4; done; # Always show logs if the container is running, this is very useful both on CI as well as while developing diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1e1c3e3..46043a2d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ jobs: tests: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: python-version: [ "3.9", "3.10", "3.11", "3.12" ] opensearch: ['1.0.1', '2.0.1', 'latest'] diff --git a/tests/opensearch.py b/tests/opensearch.py index 9106f448..05d3bd49 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -14,6 +14,9 @@ def setUpClass(cls): Set up database connection. """ import os + from time import sleep + from abcd.backends.atoms_opensearch import OpenSearchDatabase + from opensearchpy.exceptions import ConnectionError if os.getenv("GITHUB_ACTIONS") != "true": raise unittest.SkipTest("Only runs via GitHub Actions") @@ -21,17 +24,25 @@ def setUpClass(cls): cls.port = int(os.environ["port"]) cls.host = "localhost" - from abcd.backends.atoms_opensearch import OpenSearchDatabase - logging.basicConfig(level=logging.INFO) url = f"opensearch://admin:admin@{cls.host}:{cls.port}" - abcd = ABCD.from_url( - url, - index_name="test_index", - analyse_schema=False, - use_ssl=cls.security_enabled, - ) + try: + abcd = ABCD.from_url( + url, + index_name="test_index", + analyse_schema=False, + use_ssl=cls.security_enabled, + ) + except ConnectionError or ConnectionResetError: + sleep(10) + abcd = ABCD.from_url( + url, + index_name="test_index", + analyse_schema=False, + use_ssl=cls.security_enabled, + ) + assert isinstance(abcd, OpenSearchDatabase) cls.abcd = abcd From 39af132c6e42e7fd00752299ec3ccb72b9c5872a Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 1 Sep 2023 17:26:34 +0000 Subject: [PATCH 040/112] Update README --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7fbdc673..e1fb3d3c 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,53 @@ Main features: ## Installation +### General Setup + creating tables and views + ``` $ pip install git+https://github.com/libAtoms/abcd.git ``` -## Setup +Example Docker installation on Ubuntu: +``` +sudo apt-get update +sudo apt upgrade +sudo apt install docker.io +sudo groupadd docker +sudo usermod -aG docker $USER +newgrp docker # or exit and log in +``` + +Docker can be tested by running: + +``` +docker run hello-world +``` + +Example Python setup on Ubuntu (pip must be updated for poetry to be used successfully): + +``` +sudo apt install software-properties-common +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt install python3.10 +sudo apt-get install python3.10-distutils +sudo apt install python3-virtualenv +virtualenv -p /usr/bin/python3.10 venv_10 +source venv_10/bin/activate +curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 +``` + +Building and installing ABCD dependencies via poetry: + +``` +git clone https://github.com/libAtoms/abcd.git +curl -sSL https://install.python-poetry.org | python3 - +export PATH="/home/ubuntu/.local/bin:$PATH" +cd abcd +poetry install +poetry build +``` ### MongoDB @@ -55,11 +96,18 @@ The above login command will place create an `~/.abcd` file with the following c If you have an already running OpenSearch server, or install your own, then you are ready to go. Alternatively, ``` +sudo swapoff -a # optional +sudo sysctl -w vm.swappiness=1 # optional +sudo sysctl -w fs.file-max=262144 # optional sudo sysctl -w vm.max_map_count=262144 -docker run -d --rm --name abcd-opensearch -v :/data/db -p 9200:9200 -it opensearchproject/opensearch:latest +docker run -d --rm --name abcd-opensearch -v :/data/db -p 9200:9200 --env discovery.type=single-node -it opensearchproject/opensearch:latest ``` -will download and install a docker and run a database in it. +will download and install an OpenSearch image and run it. The connection can be tested with: + +``` +curl -vvv -s --insecure -u admin:admin --fail https://localhost:9200 +``` To connect to an OpenSearch database that is already running, use ``` @@ -113,3 +161,11 @@ docker stop visualiser-dev abcd-mongodb-net # stop the containers docker rm visualiser-dev abcd-mongodb-net # remove them if --rm did not docker network rm abcd-network # remove the docker network ``` + +## Testing + +Unit tests are automatically run on push and creation of pull requests. Unit testing using mock databases can also be run in the command line using: + +``` +python -m unittest tests +``` From d5f7ec10d79150c148c43740bd763d1a0d596311 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 1 Sep 2023 18:15:50 +0000 Subject: [PATCH 041/112] Update OpenSearch notebook --- tutorials/abcd_opensearch_properties.ipynb | 94 +++++++++++----------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/tutorials/abcd_opensearch_properties.ipynb b/tutorials/abcd_opensearch_properties.ipynb index 400e36f8..3642e137 100644 --- a/tutorials/abcd_opensearch_properties.ipynb +++ b/tutorials/abcd_opensearch_properties.ipynb @@ -94,7 +94,7 @@ " port: 9200\n", " db: abcd\n", " index: atoms\n", - "number of confs: 2056\n", + "number of confs: 0\n", " type: opensearch\n" ] } @@ -172,21 +172,6 @@ "cell_type": "code", "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [ - "from ase.io import iread, read" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "pycharm": { - "is_executing": false, - "metadata": false, - "name": "#%%\n" - } - }, "outputs": [ { "name": "stdout", @@ -197,12 +182,20 @@ } ], "source": [ + "from ase.io import iread, read\n", "!pwd" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data can be entered into the database as ASE Atoms objects, allowing any format readable by ase.io.read to be used." + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -212,15 +205,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 14.7 s, sys: 215 ms, total: 14.9 s\n", - "Wall time: 22.3 s\n" + "CPU times: user 15 s, sys: 181 ms, total: 15.2 s\n", + "Wall time: 25 s\n" ] } ], @@ -235,12 +228,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Specify a csv file containing extra information, and a template for the structures corresponding to each row in the csv file." + "Extra information can be added manually via a dictionary, or read in through a csv file. A template for the structures corresponding to each row in the csv file, and units in the form of `field (unit)` or `field / unit`, can also be inferred." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { "pycharm": { "is_executing": false, @@ -257,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "pycharm": { "is_executing": false @@ -280,12 +273,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Upload each row, using the infered structure file:" + "Using the inferred structure file, this data can then be uploaded together. The `extra_info`, and properties in general, do not need to match that of existing documents stored." ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -302,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -315,7 +308,7 @@ " 'Accessible Surface Area': 'm2/g'}" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -326,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -350,7 +343,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -367,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": { "pycharm": {} }, @@ -386,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -404,7 +397,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -412,7 +405,7 @@ "output_type": "stream", "text": [ "1\n", - "{'_id': '30DtQ4oBtksDlC5rnd72', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-08-30T00:52:40.560182', 'modified': '2023-08-30T00:52:40.560190', 'hash_structure': 'f6bbc3390f53917f801298dc22827262', 'hash': '0cd93d1155d74a60380a615249629708', 'derived': {'arrays_keys': ['positions', 'numbers', 'spacegroup_kinds'], 'info_keys': ['unit_cell', 'n_atoms', 'occupancy', 'spacegroup', 'cell', 'formula', 'pbc', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n" + "{'_id': 'J4TzUYoBQvF7oZdWKd8C', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-09-01T18:13:24.859685', 'modified': '2023-09-01T18:13:24.859694', 'hash_structure': '660d3f56483cf3a6dd94d833d4478fcf', 'hash': '2c1caa8af0562303fc8cfe0eba64b444', 'derived': {'arrays_keys': ['numbers', 'spacegroup_kinds', 'positions'], 'info_keys': ['pbc', 'formula', 'occupancy', 'cell', 'unit_cell', 'spacegroup', 'n_atoms', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n" ] } ], @@ -424,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -442,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -460,7 +453,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -478,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -496,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -504,7 +497,7 @@ "output_type": "stream", "text": [ "1\n", - "{'_id': '30DtQ4oBtksDlC5rnd72', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-08-30T00:52:40.560182', 'modified': '2023-08-30T00:52:40.560190', 'hash_structure': 'f6bbc3390f53917f801298dc22827262', 'hash': '0cd93d1155d74a60380a615249629708', 'derived': {'arrays_keys': ['positions', 'numbers', 'spacegroup_kinds'], 'info_keys': ['unit_cell', 'n_atoms', 'occupancy', 'spacegroup', 'cell', 'formula', 'pbc', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", + "{'_id': 'J4TzUYoBQvF7oZdWKd8C', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-09-01T18:13:24.859685', 'modified': '2023-09-01T18:13:24.859694', 'hash_structure': '660d3f56483cf3a6dd94d833d4478fcf', 'hash': '2c1caa8af0562303fc8cfe0eba64b444', 'derived': {'arrays_keys': ['numbers', 'spacegroup_kinds', 'positions'], 'info_keys': ['pbc', 'formula', 'occupancy', 'cell', 'unit_cell', 'spacegroup', 'n_atoms', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", "dict_keys(['_id', 'n_atoms', 'cell', 'pbc', 'formula', 'numbers', 'positions', 'spacegroup_kinds', 'spacegroup', 'unit_cell', 'occupancy', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume', 'elements', 'username', 'uploaded', 'modified', 'hash_structure', 'hash', 'derived'])\n" ] } @@ -518,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -531,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -540,18 +533,19 @@ "['example_value']" ] }, - "execution_count": 33, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "abcd.refresh()\n", "abcd.property(\"example_property\", query)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -564,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -573,18 +567,19 @@ "['example_value']" ] }, - "execution_count": 35, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "abcd.refresh()\n", "abcd.property(\"renamed_property\", query)" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -596,21 +591,22 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['example_value']" + "[]" ] }, - "execution_count": 47, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "abcd.refresh()\n", "# abcd.property(\"example_property\", query)\n", "abcd.property(\"renamed_property\", query)" ] From c86043c11fe02bce5152fafe4c6f2a3c3db0939f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 4 Sep 2023 10:13:05 +0000 Subject: [PATCH 042/112] Apply black formatting --- abcd/backends/atoms_opensearch.py | 3 --- abcd/server/app/db.py | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index f7f7bd00..b347f40e 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -635,7 +635,6 @@ def properties(self, query: Union[dict, str, None] = None) -> dict: for prop in self.client.indices.get_mapping(index=self.index_name)[ self.index_name ]["mappings"]["properties"].keys(): - body = { "size": 0, "query": query, @@ -738,7 +737,6 @@ def count_properties(self, query: Union[dict, str, None] = None) -> dict: return properties for key in keys: - body = { "size": 0, "query": query, @@ -955,7 +953,6 @@ def histogram(name, data, **kwargs): return None elif data and isinstance(data, list): - ptype = type(data[0]) if not all(isinstance(x, ptype) for x in data): diff --git a/abcd/server/app/db.py b/abcd/server/app/db.py index eadc1f45..7430c183 100644 --- a/abcd/server/app/db.py +++ b/abcd/server/app/db.py @@ -4,7 +4,9 @@ class Database(ABCD): - """Wrapper for the ABCD factory method for registering a the database for the Flask application.""" + """ + Wrapper for the ABCD factory method for registering a the database for the Flask application. + """ def __init__(self): super().__init__() From 8cd3ef1df25f6493bad1bef531f988b327b552e3 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 6 Sep 2023 10:38:13 +0000 Subject: [PATCH 043/112] Improve property file reader Throw errors for incorrect encoding, rather than replacing characters silently, and allow Excel spreadsheets, in addition to csv files. --- abcd/backends/atoms_properties.py | 50 +++++++++++++++++----- pyproject.toml | 2 + tests/properties.py | 22 +++++----- tutorials/abcd_opensearch_properties.ipynb | 6 +-- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index e4f72d58..5503c398 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -3,6 +3,7 @@ import numpy as np from typing import Union from pathlib import Path +import chardet class Properties: @@ -12,38 +13,44 @@ class Properties: Attributes ---------- - csv_file: Union[str, Path] - Name or path to csv file containing properties. + data_file: Union[str, Path] + Name or path to data file containing properties. Treated as a csv file + by default, but Excel spreadsheets may also be read. store_struct_file: bool Whether to construct a filename for each structure. struct_file_template: str Template string for path to files containing structure. struct_name_label: str - Field name in csv file containing values for `struct_name`. + Field name in data file containing values for `struct_name`. df: pd.Dataframe - Dataframe containing loaded property data from csv file. + Dataframe containing loaded property data from data file. units: Union[dict, None], optional Units. struct_files: list[str] List containing a filename for each structure in the dataframe. + encoding: str, optional + Encoding of csv file to be read. Default is `utf-8`. """ def __init__( self, - csv_file: Union[str, Path], + data_file: Union[str, Path], store_struct_file: bool = False, struct_file_template: Union[str, None] = None, struct_name_label: Union[str, None] = None, units: Union[dict, None] = None, infer_units: bool = False, + encoding: str = "utf-8", ): """ Initialises class. Parameters ---------- - csv_file: Union[str, Path] - Path or filename of csv file containing properties to be loaded. + data_file: Union[str, Path] + Path or filename of data file containing properties to be loaded. + Assumed to be a csv file by default, but Excel spreadsheets may + also be read. store_struct_file: bool, optional If true, use struct_file_template and struct_name_label to construct filename for each structure. Default is `False`. @@ -53,17 +60,38 @@ def __init__( Template must contain `{struct_name}`, to ensure a unique file for each structure. Default is `None`. struct_name_label: Union[str, None], optional - Field name in csv file containing values for `struct_name`. + Field name in data file containing values for `struct_name`. Required only if store_struct_file is True. Default is `None`. units: Union[dict, None], optional - Units for fields in csv file. If unspecified, _separate_units() + Units for fields in data file. If unspecified, _separate_units() is used to identify units in field names. Default is `None`. infer_units: bool, optional Whether to attempt to infer units from field names in the dataframe. Unused if units is not `None`. Default is `False`. + encoding: str, optional + Encoding of file to be read. Default is `utf-8`. + For pandas==1.2, setting this to `None` means `errors='replace'` + is passed to `open()`, which replaces invalid characters with + the replacement character. Otherwise, `errors='strict'` is passed + to `open()`, which means UnicodeDecodeError are thrown if the + encoding is wrong. + For pandas==1.3, `encoding` no longer defines how errors are + handled. `encoding_errors` instead defaults to `strict`, which has + the same effect as non-None values of `encoding` for pandas==1.2. """ - self.csv_file = csv_file - self.df = pd.read_csv(self.csv_file) + self.data_file = data_file + self.encoding = encoding + try: + self.df = pd.read_csv(self.data_file, encoding=self.encoding) + except UnicodeDecodeError: + detected = chardet.detect(Path(self.data_file).read_bytes()) + raise ValueError( + f"File cannot be decoded using encoding: {self.encoding}." + f" Detected encoding: {detected}." + ) + except pd.errors.ParserError: + self.df = pd.read_excel(self.data_file, header=0) + self.df.replace({np.nan: None}, inplace=True) if units is not None: diff --git a/pyproject.toml b/pyproject.toml index 15d2d7d8..614312f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,11 +11,13 @@ documentation = "https://libatoms.github.io/abcd/" [tool.poetry.dependencies] ase = "^3.23" +chardet = "^5.2.0" lark = "^1.1.9" luqum = "^0.13.0" matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" +openpyxl = "^3.1.2" opensearch-py = "^2.2.0" pandas = "^2.2" pymongo = "^4.7.3" diff --git a/tests/properties.py b/tests/properties.py index eebbdc6a..1afab1b7 100644 --- a/tests/properties.py +++ b/tests/properties.py @@ -3,18 +3,18 @@ class PropertiesTests(unittest.TestCase): - """Testing properties csv reader""" + """Testing properties data reader""" @classmethod def setUpClass(cls): """ - Load example csv file. + Load example data file. """ import os class_path = os.path.normpath(os.path.abspath(__file__)) - csv_file = os.path.dirname(class_path) + "/examples.csv" - cls.property = Properties(csv_file) + data_file = os.path.dirname(class_path) + "/examples.csv" + cls.property = Properties(data_file) def test_dataframe(self): """ @@ -31,7 +31,7 @@ def test_specify_units(self): """ input_units_1 = {"Integers": "items", "Floating": "seconds"} properties_1 = Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, units=input_units_1, ) self.assertEqual(properties_1.units, input_units_1) @@ -39,7 +39,7 @@ def test_specify_units(self): input_units_2 = {"Fake": "m"} with self.assertRaises(ValueError): properties_1 = Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, units=input_units_2, ) @@ -48,7 +48,7 @@ def test_infer_units(self): Test units can be inferred from field names. """ properties = Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, infer_units=True, ) expected_units = {"Comma units": "m", "Bracket units": "s"} @@ -71,7 +71,7 @@ def test_struct_file(self): struct_file_template = "test_{struct_name}_file.txt" struct_name_label = "Text" properties_1 = Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, store_struct_file=True, struct_file_template=struct_file_template, struct_name_label=struct_name_label, @@ -88,7 +88,7 @@ def test_struct_file(self): invalid_template = "invalid_template" with self.assertRaises(ValueError): Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, store_struct_file=True, struct_file_template=invalid_template, struct_name_label=struct_name_label, @@ -97,7 +97,7 @@ def test_struct_file(self): invalid_label = "label" with self.assertRaises(ValueError): Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, store_struct_file=True, struct_file_template=struct_file_template, struct_name_label=invalid_label, @@ -140,7 +140,7 @@ def test_to_list_units(self): Test units are included in properties when converting to a list. """ properties_1 = Properties( - csv_file=self.property.csv_file, + data_file=self.property.data_file, infer_units=True, ) expected_units = {"Comma units": "m", "Bracket units": "s"} diff --git a/tutorials/abcd_opensearch_properties.ipynb b/tutorials/abcd_opensearch_properties.ipynb index 3642e137..1862549d 100644 --- a/tutorials/abcd_opensearch_properties.ipynb +++ b/tutorials/abcd_opensearch_properties.ipynb @@ -228,7 +228,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Extra information can be added manually via a dictionary, or read in through a csv file. A template for the structures corresponding to each row in the csv file, and units in the form of `field (unit)` or `field / unit`, can also be inferred." + "Extra information can be added manually via a dictionary, or read in through a csv/Excel file. A template for the structures corresponding to each row in the data file, and units in the form of `field (unit)` or `field / unit`, can also be inferred." ] }, { @@ -244,7 +244,7 @@ "outputs": [], "source": [ "directory = Path('/home/ubuntu/data/')\n", - "csv_file = directory / 'DATA_copy.csv'\n", + "data_file = directory / 'DATA_copy.csv'\n", "struct_file_template = str(directory) + \"/{struct_name}_FSR-out.cif\"" ] }, @@ -260,7 +260,7 @@ "outputs": [], "source": [ "properties = Properties(\n", - " csv_file=csv_file,\n", + " data_file=data_file,\n", " store_struct_file=True,\n", " struct_file_template=struct_file_template,\n", " struct_name_label = \"MOF_name\",\n", From cc6edb4d2f998b23a307c632d152c2d039ecbcc7 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 6 Sep 2023 16:30:35 +0000 Subject: [PATCH 044/112] Add benchmarking examples --- tutorials/abcd_benchmarking.ipynb | 825 ++++++++++++++++++++++++++++++ 1 file changed, 825 insertions(+) create mode 100644 tutorials/abcd_benchmarking.ipynb diff --git a/tutorials/abcd_benchmarking.ipynb b/tutorials/abcd_benchmarking.ipynb new file mode 100644 index 00000000..fa8e9ece --- /dev/null +++ b/tutorials/abcd_benchmarking.ipynb @@ -0,0 +1,825 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from abcd import ABCD" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "os_url = 'opensearch://admin:admin@localhost:9200'\n", + "os_abcd = ABCD.from_url(os_url)\n", + "\n", + "print(os_abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MongoDatabase(url=localhost:27017, db=abcd, collection=atoms)\n" + ] + } + ], + "source": [ + "mongo_url = 'mongodb://localhost:27017'\n", + "mongo_abcd = ABCD.from_url(mongo_url)\n", + "\n", + "print(mongo_abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 197280\n", + " type: opensearch\n", + "CPU times: user 1.66 ms, sys: 1.44 ms, total: 3.1 ms\n", + "Wall time: 8.27 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "os_abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================== ABCD MongoDB ==================\n", + " type: mongodb\n", + " host: localhost\n", + " port: 27017\n", + " db: abcd\n", + "collection: atoms\n", + "number of confs: 197280\n", + " type: mongodb\n", + "CPU times: user 0 ns, sys: 1.46 ms, total: 1.46 ms\n", + "Wall time: 165 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "mongo_abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 21.4 ms, sys: 15.9 ms, total: 37.3 ms\n", + "Wall time: 143 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "{'info': ['cell', 'energy', 'formula', 'n_atoms', 'pbc', 'volume'],\n", + " 'derived': ['elements',\n", + " 'hash',\n", + " 'hash_structure',\n", + " 'modified',\n", + " 'uploaded',\n", + " 'username',\n", + " 'volume'],\n", + " 'arrays': ['forces', 'numbers', 'positions']}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_abcd.properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 5.2 ms, sys: 0 ns, total: 5.2 ms\n", + "Wall time: 1.92 s\n" + ] + }, + { + "data": { + "text/plain": [ + "{'info': ['volume', 'cell', 'n_atoms', 'pbc', 'formula', 'energy'],\n", + " 'arrays': ['forces', 'numbers', 'positions'],\n", + " 'derived': ['username',\n", + " 'volume',\n", + " 'elements',\n", + " 'modified',\n", + " 'hash_structure',\n", + " 'uploaded',\n", + " 'hash']}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_abcd.properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 43.8 ms, sys: 4.53 ms, total: 48.3 ms\n", + "Wall time: 183 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "{'cell': {'count': 197280, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'elements': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", + " 'energy': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'forces': {'count': 197280,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'formula': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash_structure': {'count': 197280,\n", + " 'category': 'derived',\n", + " 'dtype': 'scalar(str)'},\n", + " 'modified': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'numbers': {'count': 197280, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'pbc': {'count': 197280, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'positions': {'count': 197280,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'uploaded': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'username': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(float)'}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_abcd.count_properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8.72 ms, sys: 1.29 ms, total: 10 ms\n", + "Wall time: 2.02 s\n" + ] + }, + { + "data": { + "text/plain": [ + "{'n_atoms': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'pbc': {'count': 197280, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'energy': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'cell': {'count': 197280, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'volume': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(float)'},\n", + " 'formula': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'forces': {'count': 197280,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'numbers': {'count': 197280, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'positions': {'count': 197280,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'username': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'elements': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", + " 'modified': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(date)'},\n", + " 'hash_structure': {'count': 197280,\n", + " 'category': 'derived',\n", + " 'dtype': 'scalar(str)'},\n", + " 'uploaded': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(date)'},\n", + " 'hash': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'}}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_abcd.count_properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.32 ms, sys: 0 ns, total: 1.32 ms\n", + "Wall time: 4.57 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "{306: 33600,\n", + " 210: 32064,\n", + " 114: 30336,\n", + " 222: 7584,\n", + " 126: 5376,\n", + " 252: 3552,\n", + " 177: 3456,\n", + " 237: 3264,\n", + " 141: 3072,\n", + " 138: 2592,\n", + " 249: 2592,\n", + " 195: 2496,\n", + " 147: 2400,\n", + " 180: 2400,\n", + " 144: 2304,\n", + " 198: 2208,\n", + " 258: 2208,\n", + " 174: 2112,\n", + " 135: 2016,\n", + " 231: 2016,\n", + " 243: 2016,\n", + " 276: 1920,\n", + " 300: 1920,\n", + " 150: 1824,\n", + " 225: 1824,\n", + " 279: 1824,\n", + " 129: 1728,\n", + " 291: 1728,\n", + " 207: 1632,\n", + " 255: 1632,\n", + " 261: 1632,\n", + " 228: 1536,\n", + " 303: 1536,\n", + " 162: 1440,\n", + " 183: 1440,\n", + " 201: 1440,\n", + " 282: 1440,\n", + " 168: 1344,\n", + " 171: 1344,\n", + " 186: 1248,\n", + " 204: 1248,\n", + " 246: 1248,\n", + " 270: 1248,\n", + " 153: 1152,\n", + " 132: 1056,\n", + " 159: 1056,\n", + " 189: 960,\n", + " 267: 960,\n", + " 273: 960,\n", + " 288: 960,\n", + " 165: 864,\n", + " 234: 768,\n", + " 240: 768,\n", + " 264: 768,\n", + " 294: 768,\n", + " 297: 768,\n", + " 156: 576,\n", + " 192: 576,\n", + " 285: 480}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_abcd.count_property(\"n_atoms\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 183 ms, sys: 17.7 ms, total: 200 ms\n", + "Wall time: 512 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "Counter({114: 30336,\n", + " 210: 32064,\n", + " 306: 33600,\n", + " 141: 3072,\n", + " 180: 2400,\n", + " 144: 2304,\n", + " 138: 2592,\n", + " 171: 1344,\n", + " 207: 1632,\n", + " 195: 2496,\n", + " 150: 1824,\n", + " 129: 1728,\n", + " 204: 1248,\n", + " 177: 3456,\n", + " 168: 1344,\n", + " 132: 1056,\n", + " 192: 576,\n", + " 126: 5376,\n", + " 147: 2400,\n", + " 189: 960,\n", + " 135: 2016,\n", + " 174: 2112,\n", + " 165: 864,\n", + " 186: 1248,\n", + " 201: 1440,\n", + " 153: 1152,\n", + " 198: 2208,\n", + " 183: 1440,\n", + " 162: 1440,\n", + " 156: 576,\n", + " 159: 1056,\n", + " 252: 3552,\n", + " 279: 1824,\n", + " 222: 7584,\n", + " 273: 960,\n", + " 300: 1920,\n", + " 240: 768,\n", + " 303: 1536,\n", + " 291: 1728,\n", + " 288: 960,\n", + " 246: 1248,\n", + " 249: 2592,\n", + " 243: 2016,\n", + " 231: 2016,\n", + " 234: 768,\n", + " 237: 3264,\n", + " 270: 1248,\n", + " 264: 768,\n", + " 267: 960,\n", + " 255: 1632,\n", + " 258: 2208,\n", + " 282: 1440,\n", + " 276: 1920,\n", + " 297: 768,\n", + " 261: 1632,\n", + " 225: 1824,\n", + " 228: 1536,\n", + " 285: 480,\n", + " 294: 768})" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "Counter(mongo_abcd.property(\"n_atoms\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 30.4 ms, sys: 0 ns, total: 30.4 ms\n", + "Wall time: 40.9 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "(array([374., 82., 137., 105., 308., 185., 120., 80., 147., 363.]),\n", + " array([-61192.4609375 , -58157.79023438, -55123.11953125, -52088.44882813,\n", + " -49053.778125 , -46019.10742188, -42984.43671875, -39949.76601563,\n", + " -36915.0953125 , -33880.42460937, -30845.75390625]),\n", + " )" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%time\n", + "plt.hist(os_abcd.count_property(\"energy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 639 ms, sys: 17.2 ms, total: 656 ms\n", + "Wall time: 1.06 s\n" + ] + }, + { + "data": { + "text/plain": [ + "(array([41280., 7872., 13344., 10080., 33888., 18720., 11616., 7776.,\n", + " 14208., 38496.]),\n", + " array([-61192.46163388, -58157.79081243, -55123.11999097, -52088.44916952,\n", + " -49053.77834806, -46019.10752661, -42984.43670515, -39949.7658837 ,\n", + " -36915.09506224, -33880.42424079, -30845.75341933]),\n", + " )" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%time\n", + "plt.hist(mongo_abcd.property(\"energy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'range': {'n_atoms': {'lte': '300', 'gte': '200'}}}\n", + "CPU times: user 2.6 ms, sys: 0 ns, total: 2.6 ms\n", + "Wall time: 4.99 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "84768" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "query = 'n_atoms: [200 TO 300]'\n", + "os_abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 665 µs, sys: 403 µs, total: 1.07 ms\n", + "Wall time: 195 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "84768" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " 'n_atoms': {'$gte': 200, '$lte': 300},\n", + "}\n", + "mongo_abcd.count(mongo_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'bool': {'must': [{'match': {'formula': {'query': 'C48H28O32Zr6', 'zero_terms_query': 'all'}}}, {'match': {'username': {'query': 'ubuntu', 'zero_terms_query': 'all'}}}]}}\n", + "CPU times: user 1.67 ms, sys: 0 ns, total: 1.67 ms\n", + "Wall time: 4.74 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "30336" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_query = 'formula: C48H28O32Zr6 AND username: ubuntu'\n", + "os_abcd.count(os_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 507 µs, sys: 307 µs, total: 814 µs\n", + "Wall time: 192 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "30336" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " \"formula\": \"C48H28O32Zr6\",\n", + " \"username\": \"ubuntu\",\n", + "\n", + "}\n", + "mongo_abcd.count(mongo_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'term': {'pbc': {'value': 'true'}}}\n", + "CPU times: user 1.61 ms, sys: 0 ns, total: 1.61 ms\n", + "Wall time: 4.05 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "197280" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_query = 'pbc: true'\n", + "os_abcd.count(os_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.12 ms, sys: 677 µs, total: 1.8 ms\n", + "Wall time: 206 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "197280" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " 'pbc': True,\n", + "}\n", + "mongo_abcd.count(mongo_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'range': {'modified': {'lte': '2023-09-06T12:30:32.0000001'}}}\n", + "152\n", + "{'_id': 'qcR-aooBr-e2h_f6LzMC', 'n_atoms': 114, 'cell': [[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], 'pbc': [True, True, True], 'formula': 'C48H28O32Zr6', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 40, 40, 40, 40, 40, 40], 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296], [10.391931260040497, 9.232075241735581, 8.799170748954813], [15.152442318761134, 3.2144705981189303, 0.6236271192356346], [15.428455018627362, 13.198368239182761, 6.757442369774353], [20.968952462595865, 8.354501228588285, 5.937790321351722], [12.821718368988067, 11.860905590260213, 0.764468940894911], [20.164574198879585, 13.449131931085539, 8.500504258460039], [5.203325638335655, 4.037525599970674, 3.6535544413570706], [6.476452578322519, 9.882112891744764, 0.7336632917566172], [14.332783438660714, 4.5739237510789845, 5.763830060388294], [12.20845295758527, 7.975607890442319, 2.7181563401019804], [26.126453831046035, 15.25865575215541, 8.681035572143871], [7.431639790543854, 7.68880010777489, 3.739705967641281], [21.52510600020679, 15.432405681052952, 8.675468268048236], [11.49107468172553, 9.60164215963523, 0.7009214784567679], [18.70674083756121, 4.607625571215378, 5.677858158016438], [17.34676875755316, 10.130528920703508, 4.483049170020872], [2.9330861621787743, 3.3990818416373494, 0.720770788622487], [22.67189915206641, 9.23882668038352, 8.661796350384211], [15.54501705742674, 14.931708899088871, 4.905010140501105], [20.180891240581246, 11.991517760259551, 0.36399634878062614], [13.537900990107627, 8.71222318139275, 4.81955270950513], [13.02082403030889, 3.7798366294145125, 3.8744821907763676], [12.738608267554484, 13.15125952920471, 8.605595280531846], [9.30512423974256, 4.003262597986021, 2.08391144947309], [13.66172535110934, 6.786427797477926, 9.451058899918706], [19.297496722626608, 6.66303528741421, 9.65403361924748], [16.364750768476505, 11.479160632545504, 9.561987519221761], [16.965060879285595, 12.039276627942046, 3.4048076978088133], [9.872075532499599, 8.868306222192839, 9.697764141289875], [23.093789800187132, 7.8655671087878325, 11.819108411864843], [15.963054660441902, 12.957563889928995, 5.796919712452191], [20.789096532494103, 7.4060484208004835, 5.418655862348822], [20.271838371834924, 15.445983071791856, 11.893743962525676], [20.235785459686173, 13.946169611391733, 9.533785527794883], [5.468638782470736, 4.963675408702813, 4.207234520017469], [13.30165782031905, 14.29182491089219, 11.848895929341783], [13.860085269446175, 5.316906102226919, 5.113333629612867], [12.690982408563865, 7.210540402496312, 3.452020777408105], [26.695527891724396, 14.913992347710204, 9.505586002333807], [6.71805210100351, 7.061748291218562, 4.294338782243351], [20.991757417986378, 15.139143269943215, 9.631524849633491], [19.51051107137078, 14.241804768445284, 11.863723702327984], [19.467115888926717, 5.338714780974876, 5.342773829476735], [17.04442989892783, 11.179573262577135, 4.5377135674632525], [9.75214492091448, 7.909665520118783, 11.856922309351365], [23.22825354947115, 8.805217731024735, 9.58442235219589], [16.00517946832365, 13.88422938185283, 4.718010810191537], [27.480789782170447, 15.480087960917762, 11.617172959535212], [13.389347998136012, 7.596486021826197, 4.594226402069221], [13.123855701056296, 4.872440299211145, 4.040302647886903], [12.64753966896389, 13.717491998222464, 9.492966517597923], [20.97530038245366, 8.335251317448973, 10.600423519640026], [16.502636637988203, 10.85079294806207, 6.900496977690235], [9.019607067373974, 1.3080109208445687, 2.1730394493971033], [12.07299373071628, 8.404520741314311, 10.81235772773731], [18.747600350981866, 12.258841111031488, 10.56065092033455], [7.015172452108002, 5.4262894076561405, 2.347590240376888], [18.78251160034123, 7.0857013283180255, 6.946566060828641], [14.224469007695566, 4.466485050187827, 10.705760127563483], [11.603451262170989, 5.349451807744306, 2.0915175410667244], [18.543838401175798, 4.375079148751448, 10.585231910862703], [14.160667831243819, 12.098710870007285, 10.795787760496086], [14.45018544044642, 7.011509947840652, 6.832073401261807], [16.477007379156557, 13.350335720646678, 3.479800617715256], [10.582432902092235, 8.377782469927169, 10.803729958995012], [22.44315777833504, 8.370354822180708, 10.648120637633818], [16.567659589367246, 11.689973911016514, 5.702162918040675], [19.752709342190204, 6.556676208576273, 5.912505297587319], [20.937902942140084, 15.937436540790538, 10.817725992408736], [19.488886710337034, 13.540165709815675, 10.668618978356523], [6.371529009087222, 5.8630806488390075, 3.652784530849877], [13.287113670442396, 13.309066380643525, 10.810269250499239], [13.887372022428522, 6.66488149144997, 5.501186858702026], [12.488596287754628, 5.853559681882998, 3.200679859004045], [26.74856971012843, 15.90075025114669, 10.529236108850316], [10.08315746894864, 1.501050200458931, 1.573941101482483], [12.62721678223712, 8.955703669760464, 9.832552292079287], [20.39919235012287, 7.4641321602901884, 11.35141521129136], [15.34144808796582, 10.482235049392207, 7.284668181164747], [19.016384691239136, 8.259448068479994, 7.407999661220768], [13.965549620748263, 5.310837048175544, 11.629395661906845], [18.964757008970206, 11.57454425853002, 9.498296919639932], [6.724169659438005, 4.266531959190234, 1.8722045706136907], [14.989309002358754, 11.848517668179902, 11.724244051601735], [15.200359309885917, 6.086405481062033, 7.400205812261053], [10.683892382055284, 6.184519761996729, 1.7395703404079805], [17.662435367568307, 4.527759521878119, 9.64210249881064], [7.857986041984485, 6.202923889200398, 1.7950802519056281], [14.981497208999828, 4.615121609824532, 9.745747168438765], [17.887354642477394, 12.0129465321045, 11.494403179921116], [17.806453279094754, 6.309886859779258, 7.194991281595532], [17.65884499015476, 10.488278242553303, 7.416379002429795], [12.548166448986402, 7.693791677648976, 11.776895469518676], [20.34393939120336, 9.206817729894354, 9.922408032378584], [7.857463468905509, 1.74909233925955, 1.8274196418121598], [18.857763430781485, 5.182306608404846, 11.489619301518633], [14.163844810128705, 8.142657329873755, 7.360230728587728], [11.792894920225645, 4.155462409412462, 1.7001921686281716], [13.835964747759567, 11.264146337910587, 9.884038668157677], [18.442882359243793, 7.121400257955315, 9.800715117804442], [16.392129968717537, 10.582139510371382, 9.857218178507331], [9.302433061412287, 3.9652007015997683, 1.0673996577713942], [14.478398449342821, 7.162072897735833, 9.816748197723141], [15.11113884153613, 9.160319431120458, 11.201390619831347], [16.44457564782678, 6.551160139418165, 11.181147839950713], [16.50639593935091, 8.350138210113135, 8.738223430491407], [18.033642210935266, 9.122422759734661, 11.23846663261546], [10.893906920444666, 2.90658841770915, -0.019051977695474183], [14.757549238860731, 9.308555112561303, 9.214183379238824], [18.225880348003468, 9.344784918068, 9.180851999025142], [7.388359137923477, 3.030521667505481, 0.14692076074094484], [9.220983449272417, 6.075785767588413, 0.00657883162539968], [16.440728831136994, 6.2934328881825925, 9.187973671216682]], 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563], [0.16925367694563342, -0.01943702713953078, 0.38893903196958485], [0.01574595116377608, 0.17132290092535438, -0.1999552221020049], [0.6313750521363777, 0.05251478601615336, -0.8064430222079316], [-0.09833287623511343, -0.138000887230052, -0.08874934559146055], [0.19781246456634455, -0.9287673780647797, -1.0439826331463689], [0.07987955323902354, 0.3227860853196942, 1.7840037712935266], [0.3716884711227413, 0.4696845328184121, 0.370453313071228], [-0.14715298673081575, -0.28619517081945, -0.2515490388965677], [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577], [0.9120773177492224, -1.200046035662623, 1.3240873743396222], [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373], [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696], [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312], [1.1617998448227365, 0.7015918847484289, -1.3710321220485349], [0.19070902021846559, 0.8070636865577138, 0.18004202662121627], [0.22735084256512936, 0.16909786808234928, -0.1733360748519467], [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667], [1.0559542045857038, -0.7522446777152361, 1.3649098107407422], [0.7762546692811232, -1.6189836731314526, -0.7746117342375823], [0.028996903632322036, -0.4549212573567351, -0.15349589870642655], [-0.30669869163043734, -1.474741689618629, -0.22373535702799768], [-0.0288004713360364, 0.7124475973319003, -0.205686211479239], [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461], [0.024624999488289372, -0.06313755663570486, -1.801891965772292], [-0.06351139506345264, -0.159376012078223, -0.050473844173332186], [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655], [0.16200265126520474, 1.5060489867253262, -0.5510645819751359], [0.11499568285511277, -1.536591123473717, -0.46194088373525655], [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978], [1.3289642431824835, 0.5107142001499065, -0.8944297785444403], [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165], [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311], [-1.8548078439117515, 0.38714131650410166, 2.989239836187592], [0.4506861359111631, 1.1364487657471294, -0.6025277867192889], [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987], [1.0444392611021323, -2.684139342402327, -1.0437898003947796], [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224], [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265], [0.1290364782741685, 1.0164497157641128, 0.008965437397225301], [0.2684093062541945, -0.321198686108611, -1.4872871313266933], [-0.643392389215423, -0.898736890883939, -1.098486424717582], [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579], [0.8447910287406576, -1.5317872739661245, -1.3569157361908561], [0.17489879147082638, 0.8299521628403577, -1.920810638125738], [0.3438521635446514, 0.034021868028403804, 0.7196574853586566], [-0.49089716393457716, 1.32019318119886, -0.94368029174169], [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858], [-0.5808395014825368, 3.6184937280564875, 2.299079591174048], [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825], [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758], [0.545840099958743, 0.6951430433146117, 4.287385291900738], [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216], [1.0342078124131395, -1.450620626387826, 0.9064692271123347], [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989], [-1.71670514098886, -1.4817345765222634, 0.997617412133566], [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182], [0.25302742332541905, -0.05521598720041631, 0.07509935788229247], [0.4694140527455162, -0.5513201496485756, -0.6341384740218201], [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704], [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858], [0.303680730512897, -1.6201972339147879, -2.1682659662612322], [-1.573935371256837, -0.7716241121396255, 0.4766830761494265], [0.8572325978731995, -0.38723439044553537, 0.7948401469896356], [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165], [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755], [0.898834592811411, -1.0161931196493315, 0.970241846276607], [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992], [-0.24175827732254024, 1.3876321937466838, 0.534561183763117], [0.6898959355886991, -1.1971756558776325, -2.6977317373963534], [0.3469128049778768, -0.8706239325849023, -0.7535286867304852], [0.5164133078455445, -1.1737287359463862, -0.8774512404325054], [0.07070174270471455, 2.0437350895055317, -2.2527981881306025], [0.3288256070994712, -0.3856912142121501, -0.4543299035851946], [0.46220673582211436, -1.5513682828935087, 0.2903063650833339], [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991], [1.9752635218505312, 0.3340079230208498, -1.0452476159967947], [0.8562020996487061, 0.4789574741768384, -0.4695076409076208], [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598], [0.4557039012178522, 0.3706605440016026, 0.8159874720805957], [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655], [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426], [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057], [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977], [0.5114222820137425, 0.9816344052405028, 0.021354041800663897], [-1.066992979728017, 1.3317744591489784, -0.48617301863097473], [0.22686850357582064, -0.6558709822362698, -0.6488395287823175], [0.7797446849745541, 0.08798264257114148, 0.9232755012995165], [0.23828317402856555, 0.1996127511351823, 0.16316684686413324], [1.8341906803324939, 0.7465810512458905, -1.8839816394548647], [0.25763432631605526, -0.3878936213456354, -0.15371958469827013], [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458], [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378], [1.5710907025053915, 0.6496761658138797, -0.2693559865093303], [0.13329885341529846, -0.16092278785630468, -0.8911017423623445], [0.4560890525003599, -0.12238914766138208, 0.02436686071149516], [0.04387896406897804, 0.4783136698968656, 0.7284398601970398], [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581], [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554], [0.5963838781433183, -0.31271095971465634, -1.0048920919648563], [0.2392545368759047, -0.07763909377589129, -0.3403374652590171], [1.0458302280169298, -0.7758582051438556, -0.16412124042933254], [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372], [0.310471014472195, 0.4872791072940909, 0.18266249516014715], [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393], [0.23109436904931635, 0.627365159344662, 1.1682219467816664], [0.06169413921247506, -0.7133752514222126, 0.5373045510423942], [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393], [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554], [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261], [0.753222725431297, 0.3270011521591009, -0.33560457820400924], [-0.7786571082555904, 0.413619053069661, -1.222248027349609], [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947], [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]], 'energy': -30848.841105643754, 'volume': 2273.382588904185, 'elements': {'1': 28, '6': 48, '8': 32, '40': 6}, 'username': 'ubuntu', 'uploaded': '2023-09-06T12:30:31.588024', 'modified': '2023-09-06T12:30:31.588031', 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850', 'hash': '3768575c18d7b609556913d562d87d36', 'derived': {'arrays_keys': ['forces', 'numbers', 'positions'], 'info_keys': ['n_atoms', 'cell', 'pbc', 'formula', 'energy', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", + "CPU times: user 117 ms, sys: 0 ns, total: 117 ms\n", + "Wall time: 144 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "os_query = 'modified: [* TO 2023-09-06T12:30:32.0000001]'\n", + "print(os_abcd.count(os_query))\n", + "for items in list(os_abcd.get_items(os_query)):\n", + " print(items)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "152\n", + "{'_id': ObjectId('64f5bd3babc8fc69d4b2938f'), 'n_atoms': 114, 'cell': [[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], 'pbc': [True, True, True], 'formula': 'C48H28O32Zr6', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 40, 40, 40, 40, 40, 40], 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296], [10.391931260040497, 9.232075241735581, 8.799170748954813], [15.152442318761134, 3.2144705981189303, 0.6236271192356346], [15.428455018627362, 13.198368239182761, 6.757442369774353], [20.968952462595865, 8.354501228588285, 5.937790321351722], [12.821718368988067, 11.860905590260213, 0.764468940894911], [20.164574198879585, 13.449131931085539, 8.500504258460039], [5.203325638335655, 4.037525599970674, 3.6535544413570706], [6.476452578322519, 9.882112891744764, 0.7336632917566172], [14.332783438660714, 4.5739237510789845, 5.763830060388294], [12.20845295758527, 7.975607890442319, 2.7181563401019804], [26.126453831046035, 15.25865575215541, 8.681035572143871], [7.431639790543854, 7.68880010777489, 3.739705967641281], [21.52510600020679, 15.432405681052952, 8.675468268048236], [11.49107468172553, 9.60164215963523, 0.7009214784567679], [18.70674083756121, 4.607625571215378, 5.677858158016438], [17.34676875755316, 10.130528920703508, 4.483049170020872], [2.9330861621787743, 3.3990818416373494, 0.720770788622487], [22.67189915206641, 9.23882668038352, 8.661796350384211], [15.54501705742674, 14.931708899088871, 4.905010140501105], [20.180891240581246, 11.991517760259551, 0.36399634878062614], [13.537900990107627, 8.71222318139275, 4.81955270950513], [13.02082403030889, 3.7798366294145125, 3.8744821907763676], [12.738608267554484, 13.15125952920471, 8.605595280531846], [9.30512423974256, 4.003262597986021, 2.08391144947309], [13.66172535110934, 6.786427797477926, 9.451058899918706], [19.297496722626608, 6.66303528741421, 9.65403361924748], [16.364750768476505, 11.479160632545504, 9.561987519221761], [16.965060879285595, 12.039276627942046, 3.4048076978088133], [9.872075532499599, 8.868306222192839, 9.697764141289875], [23.093789800187132, 7.8655671087878325, 11.819108411864843], [15.963054660441902, 12.957563889928995, 5.796919712452191], [20.789096532494103, 7.4060484208004835, 5.418655862348822], [20.271838371834924, 15.445983071791856, 11.893743962525676], [20.235785459686173, 13.946169611391733, 9.533785527794883], [5.468638782470736, 4.963675408702813, 4.207234520017469], [13.30165782031905, 14.29182491089219, 11.848895929341783], [13.860085269446175, 5.316906102226919, 5.113333629612867], [12.690982408563865, 7.210540402496312, 3.452020777408105], [26.695527891724396, 14.913992347710204, 9.505586002333807], [6.71805210100351, 7.061748291218562, 4.294338782243351], [20.991757417986378, 15.139143269943215, 9.631524849633491], [19.51051107137078, 14.241804768445284, 11.863723702327984], [19.467115888926717, 5.338714780974876, 5.342773829476735], [17.04442989892783, 11.179573262577135, 4.5377135674632525], [9.75214492091448, 7.909665520118783, 11.856922309351365], [23.22825354947115, 8.805217731024735, 9.58442235219589], [16.00517946832365, 13.88422938185283, 4.718010810191537], [27.480789782170447, 15.480087960917762, 11.617172959535212], [13.389347998136012, 7.596486021826197, 4.594226402069221], [13.123855701056296, 4.872440299211145, 4.040302647886903], [12.64753966896389, 13.717491998222464, 9.492966517597923], [20.97530038245366, 8.335251317448973, 10.600423519640026], [16.502636637988203, 10.85079294806207, 6.900496977690235], [9.019607067373974, 1.3080109208445687, 2.1730394493971033], [12.07299373071628, 8.404520741314311, 10.81235772773731], [18.747600350981866, 12.258841111031488, 10.56065092033455], [7.015172452108002, 5.4262894076561405, 2.347590240376888], [18.78251160034123, 7.0857013283180255, 6.946566060828641], [14.224469007695566, 4.466485050187827, 10.705760127563483], [11.603451262170989, 5.349451807744306, 2.0915175410667244], [18.543838401175798, 4.375079148751448, 10.585231910862703], [14.160667831243819, 12.098710870007285, 10.795787760496086], [14.45018544044642, 7.011509947840652, 6.832073401261807], [16.477007379156557, 13.350335720646678, 3.479800617715256], [10.582432902092235, 8.377782469927169, 10.803729958995012], [22.44315777833504, 8.370354822180708, 10.648120637633818], [16.567659589367246, 11.689973911016514, 5.702162918040675], [19.752709342190204, 6.556676208576273, 5.912505297587319], [20.937902942140084, 15.937436540790538, 10.817725992408736], [19.488886710337034, 13.540165709815675, 10.668618978356523], [6.371529009087222, 5.8630806488390075, 3.652784530849877], [13.287113670442396, 13.309066380643525, 10.810269250499239], [13.887372022428522, 6.66488149144997, 5.501186858702026], [12.488596287754628, 5.853559681882998, 3.200679859004045], [26.74856971012843, 15.90075025114669, 10.529236108850316], [10.08315746894864, 1.501050200458931, 1.573941101482483], [12.62721678223712, 8.955703669760464, 9.832552292079287], [20.39919235012287, 7.4641321602901884, 11.35141521129136], [15.34144808796582, 10.482235049392207, 7.284668181164747], [19.016384691239136, 8.259448068479994, 7.407999661220768], [13.965549620748263, 5.310837048175544, 11.629395661906845], [18.964757008970206, 11.57454425853002, 9.498296919639932], [6.724169659438005, 4.266531959190234, 1.8722045706136907], [14.989309002358754, 11.848517668179902, 11.724244051601735], [15.200359309885917, 6.086405481062033, 7.400205812261053], [10.683892382055284, 6.184519761996729, 1.7395703404079805], [17.662435367568307, 4.527759521878119, 9.64210249881064], [7.857986041984485, 6.202923889200398, 1.7950802519056281], [14.981497208999828, 4.615121609824532, 9.745747168438765], [17.887354642477394, 12.0129465321045, 11.494403179921116], [17.806453279094754, 6.309886859779258, 7.194991281595532], [17.65884499015476, 10.488278242553303, 7.416379002429795], [12.548166448986402, 7.693791677648976, 11.776895469518676], [20.34393939120336, 9.206817729894354, 9.922408032378584], [7.857463468905509, 1.74909233925955, 1.8274196418121598], [18.857763430781485, 5.182306608404846, 11.489619301518633], [14.163844810128705, 8.142657329873755, 7.360230728587728], [11.792894920225645, 4.155462409412462, 1.7001921686281716], [13.835964747759567, 11.264146337910587, 9.884038668157677], [18.442882359243793, 7.121400257955315, 9.800715117804442], [16.392129968717537, 10.582139510371382, 9.857218178507331], [9.302433061412287, 3.9652007015997683, 1.0673996577713942], [14.478398449342821, 7.162072897735833, 9.816748197723141], [15.11113884153613, 9.160319431120458, 11.201390619831347], [16.44457564782678, 6.551160139418165, 11.181147839950713], [16.50639593935091, 8.350138210113135, 8.738223430491407], [18.033642210935266, 9.122422759734661, 11.23846663261546], [10.893906920444666, 2.90658841770915, -0.019051977695474183], [14.757549238860731, 9.308555112561303, 9.214183379238824], [18.225880348003468, 9.344784918068, 9.180851999025142], [7.388359137923477, 3.030521667505481, 0.14692076074094484], [9.220983449272417, 6.075785767588413, 0.00657883162539968], [16.440728831136994, 6.2934328881825925, 9.187973671216682]], 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563], [0.16925367694563342, -0.01943702713953078, 0.38893903196958485], [0.01574595116377608, 0.17132290092535438, -0.1999552221020049], [0.6313750521363777, 0.05251478601615336, -0.8064430222079316], [-0.09833287623511343, -0.138000887230052, -0.08874934559146055], [0.19781246456634455, -0.9287673780647797, -1.0439826331463689], [0.07987955323902354, 0.3227860853196942, 1.7840037712935266], [0.3716884711227413, 0.4696845328184121, 0.370453313071228], [-0.14715298673081575, -0.28619517081945, -0.2515490388965677], [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577], [0.9120773177492224, -1.200046035662623, 1.3240873743396222], [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373], [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696], [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312], [1.1617998448227365, 0.7015918847484289, -1.3710321220485349], [0.19070902021846559, 0.8070636865577138, 0.18004202662121627], [0.22735084256512936, 0.16909786808234928, -0.1733360748519467], [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667], [1.0559542045857038, -0.7522446777152361, 1.3649098107407422], [0.7762546692811232, -1.6189836731314526, -0.7746117342375823], [0.028996903632322036, -0.4549212573567351, -0.15349589870642655], [-0.30669869163043734, -1.474741689618629, -0.22373535702799768], [-0.0288004713360364, 0.7124475973319003, -0.205686211479239], [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461], [0.024624999488289372, -0.06313755663570486, -1.801891965772292], [-0.06351139506345264, -0.159376012078223, -0.050473844173332186], [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655], [0.16200265126520474, 1.5060489867253262, -0.5510645819751359], [0.11499568285511277, -1.536591123473717, -0.46194088373525655], [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978], [1.3289642431824835, 0.5107142001499065, -0.8944297785444403], [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165], [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311], [-1.8548078439117515, 0.38714131650410166, 2.989239836187592], [0.4506861359111631, 1.1364487657471294, -0.6025277867192889], [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987], [1.0444392611021323, -2.684139342402327, -1.0437898003947796], [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224], [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265], [0.1290364782741685, 1.0164497157641128, 0.008965437397225301], [0.2684093062541945, -0.321198686108611, -1.4872871313266933], [-0.643392389215423, -0.898736890883939, -1.098486424717582], [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579], [0.8447910287406576, -1.5317872739661245, -1.3569157361908561], [0.17489879147082638, 0.8299521628403577, -1.920810638125738], [0.3438521635446514, 0.034021868028403804, 0.7196574853586566], [-0.49089716393457716, 1.32019318119886, -0.94368029174169], [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858], [-0.5808395014825368, 3.6184937280564875, 2.299079591174048], [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825], [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758], [0.545840099958743, 0.6951430433146117, 4.287385291900738], [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216], [1.0342078124131395, -1.450620626387826, 0.9064692271123347], [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989], [-1.71670514098886, -1.4817345765222634, 0.997617412133566], [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182], [0.25302742332541905, -0.05521598720041631, 0.07509935788229247], [0.4694140527455162, -0.5513201496485756, -0.6341384740218201], [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704], [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858], [0.303680730512897, -1.6201972339147879, -2.1682659662612322], [-1.573935371256837, -0.7716241121396255, 0.4766830761494265], [0.8572325978731995, -0.38723439044553537, 0.7948401469896356], [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165], [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755], [0.898834592811411, -1.0161931196493315, 0.970241846276607], [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992], [-0.24175827732254024, 1.3876321937466838, 0.534561183763117], [0.6898959355886991, -1.1971756558776325, -2.6977317373963534], [0.3469128049778768, -0.8706239325849023, -0.7535286867304852], [0.5164133078455445, -1.1737287359463862, -0.8774512404325054], [0.07070174270471455, 2.0437350895055317, -2.2527981881306025], [0.3288256070994712, -0.3856912142121501, -0.4543299035851946], [0.46220673582211436, -1.5513682828935087, 0.2903063650833339], [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991], [1.9752635218505312, 0.3340079230208498, -1.0452476159967947], [0.8562020996487061, 0.4789574741768384, -0.4695076409076208], [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598], [0.4557039012178522, 0.3706605440016026, 0.8159874720805957], [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655], [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426], [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057], [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977], [0.5114222820137425, 0.9816344052405028, 0.021354041800663897], [-1.066992979728017, 1.3317744591489784, -0.48617301863097473], [0.22686850357582064, -0.6558709822362698, -0.6488395287823175], [0.7797446849745541, 0.08798264257114148, 0.9232755012995165], [0.23828317402856555, 0.1996127511351823, 0.16316684686413324], [1.8341906803324939, 0.7465810512458905, -1.8839816394548647], [0.25763432631605526, -0.3878936213456354, -0.15371958469827013], [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458], [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378], [1.5710907025053915, 0.6496761658138797, -0.2693559865093303], [0.13329885341529846, -0.16092278785630468, -0.8911017423623445], [0.4560890525003599, -0.12238914766138208, 0.02436686071149516], [0.04387896406897804, 0.4783136698968656, 0.7284398601970398], [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581], [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554], [0.5963838781433183, -0.31271095971465634, -1.0048920919648563], [0.2392545368759047, -0.07763909377589129, -0.3403374652590171], [1.0458302280169298, -0.7758582051438556, -0.16412124042933254], [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372], [0.310471014472195, 0.4872791072940909, 0.18266249516014715], [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393], [0.23109436904931635, 0.627365159344662, 1.1682219467816664], [0.06169413921247506, -0.7133752514222126, 0.5373045510423942], [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393], [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554], [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261], [0.753222725431297, 0.3270011521591009, -0.33560457820400924], [-0.7786571082555904, 0.413619053069661, -1.222248027349609], [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947], [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]], 'energy': -30848.841105643754, 'volume': 2273.382588904185, 'elements': {'1': 28, '6': 48, '8': 32, '40': 6}, 'username': 'ubuntu', 'uploaded': datetime.datetime(2023, 9, 4, 11, 19, 23, 722000), 'modified': datetime.datetime(2023, 9, 4, 11, 19, 23, 722000), 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850', 'hash': '919ff4a70b553ddf39772a76d202ebce', 'derived': {'arrays_keys': ['numbers', 'positions', 'forces'], 'info_keys': ['formula', 'pbc', 'n_atoms', 'energy', 'cell', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", + "CPU times: user 16.2 ms, sys: 1.81 ms, total: 18 ms\n", + "Wall time: 586 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " 'modified': {'$lt': datetime.fromisoformat('2023-09-04T11:19:24.310')}\n", + "}\n", + "print(mongo_abcd.count(mongo_query))\n", + "for items in list(mongo_abcd.get_items(mongo_query)):\n", + " print(items)\n", + " break" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv_9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 8aacf4b66784273f2a20467ac3754de6441b137c Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 7 Sep 2023 11:39:11 +0000 Subject: [PATCH 045/112] Enable uploading list of extra info --- abcd/backends/atoms_opensearch.py | 14 +++++++++++--- abcd/backends/atoms_pymongo.py | 8 ++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index b347f40e..a8e50506 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -388,7 +388,7 @@ def save_bulk(self, actions: Iterable): def push( self, atoms: Union[Atoms, Iterable], - extra_info: Union[dict, str, None] = None, + extra_info: Union[dict, str, list, None] = None, store_calc: bool = True, ): """ @@ -406,6 +406,10 @@ def push( """ if extra_info and isinstance(extra_info, str): extra_info = extras.parser.parse(extra_info) # type: ignore + if extra_info and isinstance(extra_info, list): + for i, info in enumerate(extra_info): + if isinstance(info, str): + extra_info[i] = extras.parser.parse(info) if isinstance(atoms, Atoms): data = AtomsModel.from_atoms( @@ -419,12 +423,16 @@ def push( elif isinstance(atoms, Generator) or isinstance(atoms, list): actions = [] - for item in atoms: + for i, item in enumerate(atoms): + if isinstance(extra_info, list): + info = extra_info[i] + else: + info = extra_info data = AtomsModel.from_atoms( self.client, self.index_name, item, - extra_info=extra_info, # type: ignore + extra_info=info, # type: ignore store_calc=store_calc, ) actions.append(data.data) diff --git a/abcd/backends/atoms_pymongo.py b/abcd/backends/atoms_pymongo.py index da230fb6..ac702fcb 100644 --- a/abcd/backends/atoms_pymongo.py +++ b/abcd/backends/atoms_pymongo.py @@ -235,9 +235,13 @@ def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): # self.collection.insert_one(data) elif isinstance(atoms, types.GeneratorType) or isinstance(atoms, list): - for item in atoms: + for i, item in enumerate(atoms): + if isinstance(extra_info, list): + info = extra_info[i] + else: + info = extra_info data = AtomsModel.from_atoms( - self.collection, item, extra_info=extra_info, store_calc=store_calc + self.collection, item, extra_info=info, store_calc=store_calc ) data.save() From 3d11150fa3f15b2390160425e02c3f52c0b02380 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Thu, 7 Sep 2023 15:56:48 +0000 Subject: [PATCH 046/112] Extend benchmarking --- tutorials/abcd_benchmarking.ipynb | 750 +++++++++++++++++++++--------- 1 file changed, 536 insertions(+), 214 deletions(-) diff --git a/tutorials/abcd_benchmarking.ipynb b/tutorials/abcd_benchmarking.ipynb index fa8e9ece..aa0fdc3f 100644 --- a/tutorials/abcd_benchmarking.ipynb +++ b/tutorials/abcd_benchmarking.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Benchmarking OpenSearch performance against MongoDB " + ] + }, { "cell_type": "code", "execution_count": 1, @@ -50,6 +57,13 @@ "print(mongo_abcd)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data in each database consists of ~400,000 structures, made up of 2055 structures repeated 192 times. Of these, half were uploaded only as atoms objects, and half were uploaded with extra information added from a csv file." + ] + }, { "cell_type": "code", "execution_count": 4, @@ -65,10 +79,10 @@ " port: 9200\n", " db: abcd\n", " index: atoms\n", - "number of confs: 197280\n", + "number of confs: 394560\n", " type: opensearch\n", - "CPU times: user 1.66 ms, sys: 1.44 ms, total: 3.1 ms\n", - "Wall time: 8.27 ms\n" + "CPU times: user 0 ns, sys: 2.34 ms, total: 2.34 ms\n", + "Wall time: 7.86 ms\n" ] } ], @@ -92,10 +106,10 @@ " port: 27017\n", " db: abcd\n", "collection: atoms\n", - "number of confs: 197280\n", + "number of confs: 394560\n", " type: mongodb\n", - "CPU times: user 0 ns, sys: 1.46 ms, total: 1.46 ms\n", - "Wall time: 165 ms\n" + "CPU times: user 0 ns, sys: 2.73 ms, total: 2.73 ms\n", + "Wall time: 323 ms\n" ] } ], @@ -113,14 +127,70 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 21.4 ms, sys: 15.9 ms, total: 37.3 ms\n", - "Wall time: 143 ms\n" + "CPU times: user 67.1 ms, sys: 8.17 ms, total: 75.3 ms\n", + "Wall time: 207 ms\n" ] }, { "data": { "text/plain": [ - "{'info': ['cell', 'energy', 'formula', 'n_atoms', 'pbc', 'volume'],\n", + "{'info': ['1aromatico-up',\n", + " '2D',\n", + " '2aromatici-up',\n", + " '5-m-rings',\n", + " '5m-ring-leg2met',\n", + " '6m-rings',\n", + " 'Accessible Surface Area',\n", + " 'Band_gap',\n", + " 'CN-M',\n", + " 'COOM',\n", + " 'Cell volume',\n", + " 'Crit: metal',\n", + " 'Crit: pi-pi stacking',\n", + " 'Crit: redox active linker',\n", + " 'Crit: redox match',\n", + " 'Criteria#',\n", + " 'Density',\n", + " 'Dos at CBM',\n", + " 'Dos at Fermi energy',\n", + " 'Dos at VBM',\n", + " 'HSE band gap',\n", + " 'LCD',\n", + " 'M-C-C-TRIANG',\n", + " 'M-H2O-M',\n", + " 'M-N-NM-N-M',\n", + " 'M-h2o',\n", + " 'MOF_name',\n", + " 'Metal',\n", + " 'Metal 2',\n", + " 'Metal 3',\n", + " 'Metal density',\n", + " 'Metals number',\n", + " 'Multiplier_Sum',\n", + " 'N3--NCN up',\n", + " 'PLD',\n", + " 'Space_group',\n", + " 'Space_group#',\n", + " 'Temp',\n", + " 'Volume Fraction',\n", + " 'Year',\n", + " 'Zprime',\n", + " 'author',\n", + " 'benzene',\n", + " 'cell',\n", + " 'energy',\n", + " 'formula',\n", + " 'metal-N',\n", + " 'metal-O',\n", + " 'metal-S',\n", + " 'metal-halogen',\n", + " 'n_atoms',\n", + " 'pbc',\n", + " 'pyridine',\n", + " 'pyrimidine',\n", + " 'units',\n", + " 'volume',\n", + " 'without ions'],\n", " 'derived': ['elements',\n", " 'hash',\n", " 'hash_structure',\n", @@ -150,22 +220,78 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 5.2 ms, sys: 0 ns, total: 5.2 ms\n", - "Wall time: 1.92 s\n" + "CPU times: user 4.59 ms, sys: 3.13 ms, total: 7.72 ms\n", + "Wall time: 6.13 s\n" ] }, { "data": { "text/plain": [ - "{'info': ['volume', 'cell', 'n_atoms', 'pbc', 'formula', 'energy'],\n", + "{'info': ['Dos at Fermi energy',\n", + " 'pbc',\n", + " 'Metal 3',\n", + " 'Multiplier_Sum',\n", + " '1aromatico-up',\n", + " 'Crit: redox match',\n", + " 'volume',\n", + " 'without ions',\n", + " 'Density',\n", + " 'metal-halogen',\n", + " 'MOF_name',\n", + " 'pyrimidine',\n", + " 'Dos at VBM',\n", + " 'COOM',\n", + " 'Metals number',\n", + " 'Crit: redox active linker',\n", + " 'Accessible Surface Area',\n", + " 'Metal density',\n", + " 'M-C-C-TRIANG',\n", + " 'HSE band gap',\n", + " 'metal-O',\n", + " 'M-h2o',\n", + " 'Dos at CBM',\n", + " 'PLD',\n", + " 'metal-S',\n", + " '2D',\n", + " 'energy',\n", + " 'Band_gap',\n", + " 'M-N-NM-N-M',\n", + " 'N3--NCN up',\n", + " 'Space_group',\n", + " 'cell',\n", + " 'Crit: pi-pi stacking',\n", + " '5m-ring-leg2met',\n", + " 'LCD',\n", + " 'Volume Fraction',\n", + " 'Criteria#',\n", + " 'formula',\n", + " 'Zprime',\n", + " 'Crit: metal',\n", + " '5-m-rings',\n", + " 'M-H2O-M',\n", + " 'Cell volume',\n", + " 'Metal 2',\n", + " 'author',\n", + " '2aromatici-up',\n", + " 'benzene',\n", + " 'metal-N',\n", + " 'CN-M',\n", + " '6m-rings',\n", + " 'units',\n", + " 'n_atoms',\n", + " 'Year',\n", + " 'Space_group#',\n", + " 'pyridine',\n", + " 'Temp',\n", + " 'Metal'],\n", " 'arrays': ['forces', 'numbers', 'positions'],\n", " 'derived': ['username',\n", " 'volume',\n", - " 'elements',\n", - " 'modified',\n", - " 'hash_structure',\n", " 'uploaded',\n", - " 'hash']}" + " 'hash',\n", + " 'modified',\n", + " 'elements',\n", + " 'hash_structure']}" ] }, "execution_count": 7, @@ -187,34 +313,111 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 43.8 ms, sys: 4.53 ms, total: 48.3 ms\n", - "Wall time: 183 ms\n" + "CPU times: user 98.5 ms, sys: 22.1 ms, total: 121 ms\n", + "Wall time: 446 ms\n" ] }, { "data": { "text/plain": [ - "{'cell': {'count': 197280, 'category': 'info', 'dtype': 'array(float)'},\n", - " 'elements': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", - " 'energy': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", - " 'forces': {'count': 197280,\n", + "{'1aromatico-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '2D': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '2aromatici-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '5-m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '5m-ring-leg2met': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '6m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Accessible Surface Area': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Band_gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'CN-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'COOM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Cell volume': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Crit: metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Crit: pi-pi stacking': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox active linker': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox match': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Criteria#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'Density': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Dos at CBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'Dos at Fermi energy': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(float)'},\n", + " 'Dos at VBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'HSE band gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'LCD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-C-C-TRIANG': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-H2O-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-N-NM-N-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-h2o': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'MOF_name': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal 2': {'count': 9034, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal 3': {'count': 409, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal density': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Metals number': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Multiplier_Sum': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'N3--NCN up': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'PLD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Space_group': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Space_group#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Temp': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Volume Fraction': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Year': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Zprime': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'author': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'benzene': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'cell': {'count': 394560, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'elements': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", + " 'energy': {'count': 394560, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'forces': {'count': 394560,\n", " 'category': 'arrays',\n", " 'dtype': 'array(float, N x 3)'},\n", - " 'formula': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", - " 'hash': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", - " 'hash_structure': {'count': 197280,\n", + " 'formula': {'count': 394560, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash_structure': {'count': 394560,\n", " 'category': 'derived',\n", " 'dtype': 'scalar(str)'},\n", - " 'modified': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", - " 'n_atoms': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", - " 'numbers': {'count': 197280, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", - " 'pbc': {'count': 197280, 'category': 'info', 'dtype': 'vector(bool)'},\n", - " 'positions': {'count': 197280,\n", + " 'metal-N': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-O': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-S': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-halogen': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'modified': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 394560, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'numbers': {'count': 394560, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'pbc': {'count': 394560, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'positions': {'count': 394560,\n", " 'category': 'arrays',\n", " 'dtype': 'array(float, N x 3)'},\n", - " 'uploaded': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", - " 'username': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", - " 'volume': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(float)'}}" + " 'pyridine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'pyrimidine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'units': {'count': 197280, 'category': 'info', 'dtype': 'scalar(dict)'},\n", + " 'uploaded': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'username': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(float)'},\n", + " 'without ions': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'}}" ] }, "execution_count": 8, @@ -236,34 +439,111 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 8.72 ms, sys: 1.29 ms, total: 10 ms\n", - "Wall time: 2.02 s\n" + "CPU times: user 78.6 ms, sys: 10 ms, total: 88.6 ms\n", + "Wall time: 21.7 s\n" ] }, { "data": { "text/plain": [ - "{'n_atoms': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", - " 'pbc': {'count': 197280, 'category': 'info', 'dtype': 'vector(bool)'},\n", - " 'energy': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", - " 'cell': {'count': 197280, 'category': 'info', 'dtype': 'array(float)'},\n", - " 'volume': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(float)'},\n", - " 'formula': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", - " 'forces': {'count': 197280,\n", + "{'Dos at Fermi energy': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(float)'},\n", + " 'pbc': {'count': 394560, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'Metal 3': {'count': 409, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Multiplier_Sum': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '1aromatico-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox match': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(float)'},\n", + " 'without ions': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Density': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-halogen': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'MOF_name': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'pyrimidine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Dos at VBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'COOM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metals number': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox active linker': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Accessible Surface Area': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Metal density': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'M-C-C-TRIANG': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'HSE band gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-O': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-h2o': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Dos at CBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'PLD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-S': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '2D': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'energy': {'count': 394560, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'Space_group': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-N-NM-N-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'N3--NCN up': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Band_gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'cell': {'count': 394560, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'Crit: pi-pi stacking': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '5m-ring-leg2met': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'LCD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Volume Fraction': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Criteria#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'formula': {'count': 394560, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Zprime': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Crit: metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '5-m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-H2O-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Cell volume': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal 2': {'count': 9034, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'author': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '2aromatici-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'benzene': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-N': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'CN-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '6m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'units': {'count': 197280, 'category': 'info', 'dtype': 'scalar(dict)'},\n", + " 'Year': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 394560, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'Space_group#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'pyridine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Temp': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'positions': {'count': 394560,\n", " 'category': 'arrays',\n", " 'dtype': 'array(float, N x 3)'},\n", - " 'numbers': {'count': 197280, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", - " 'positions': {'count': 197280,\n", + " 'forces': {'count': 394560,\n", " 'category': 'arrays',\n", " 'dtype': 'array(float, N x 3)'},\n", - " 'username': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'},\n", - " 'elements': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", - " 'modified': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(date)'},\n", - " 'hash_structure': {'count': 197280,\n", + " 'numbers': {'count': 394560, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'modified': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(date)'},\n", + " 'uploaded': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(date)'},\n", + " 'hash_structure': {'count': 394560,\n", " 'category': 'derived',\n", " 'dtype': 'scalar(str)'},\n", - " 'uploaded': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(date)'},\n", - " 'hash': {'count': 197280, 'category': 'derived', 'dtype': 'scalar(str)'}}" + " 'username': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'elements': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(dict)'}}" ] }, "execution_count": 9, @@ -285,72 +565,72 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.32 ms, sys: 0 ns, total: 1.32 ms\n", - "Wall time: 4.57 ms\n" + "CPU times: user 3.18 ms, sys: 630 µs, total: 3.81 ms\n", + "Wall time: 8.16 ms\n" ] }, { "data": { "text/plain": [ - "{306: 33600,\n", - " 210: 32064,\n", - " 114: 30336,\n", - " 222: 7584,\n", - " 126: 5376,\n", - " 252: 3552,\n", - " 177: 3456,\n", - " 237: 3264,\n", - " 141: 3072,\n", - " 138: 2592,\n", - " 249: 2592,\n", - " 195: 2496,\n", - " 147: 2400,\n", - " 180: 2400,\n", - " 144: 2304,\n", - " 198: 2208,\n", - " 258: 2208,\n", - " 174: 2112,\n", - " 135: 2016,\n", - " 231: 2016,\n", - " 243: 2016,\n", - " 276: 1920,\n", - " 300: 1920,\n", - " 150: 1824,\n", - " 225: 1824,\n", - " 279: 1824,\n", - " 129: 1728,\n", - " 291: 1728,\n", - " 207: 1632,\n", - " 255: 1632,\n", - " 261: 1632,\n", - " 228: 1536,\n", - " 303: 1536,\n", - " 162: 1440,\n", - " 183: 1440,\n", - " 201: 1440,\n", - " 282: 1440,\n", - " 168: 1344,\n", - " 171: 1344,\n", - " 186: 1248,\n", - " 204: 1248,\n", - " 246: 1248,\n", - " 270: 1248,\n", - " 153: 1152,\n", - " 132: 1056,\n", - " 159: 1056,\n", - " 189: 960,\n", - " 267: 960,\n", - " 273: 960,\n", - " 288: 960,\n", - " 165: 864,\n", - " 234: 768,\n", - " 240: 768,\n", - " 264: 768,\n", - " 294: 768,\n", - " 297: 768,\n", - " 156: 576,\n", - " 192: 576,\n", - " 285: 480}" + "{306: 67200,\n", + " 210: 64128,\n", + " 114: 60672,\n", + " 222: 15168,\n", + " 126: 10752,\n", + " 252: 7104,\n", + " 177: 6912,\n", + " 237: 6528,\n", + " 141: 6144,\n", + " 138: 5184,\n", + " 249: 5184,\n", + " 195: 4992,\n", + " 147: 4800,\n", + " 180: 4800,\n", + " 144: 4608,\n", + " 198: 4416,\n", + " 258: 4416,\n", + " 174: 4224,\n", + " 135: 4032,\n", + " 231: 4032,\n", + " 243: 4032,\n", + " 276: 3840,\n", + " 300: 3840,\n", + " 150: 3648,\n", + " 225: 3648,\n", + " 279: 3648,\n", + " 129: 3456,\n", + " 291: 3456,\n", + " 207: 3264,\n", + " 255: 3264,\n", + " 261: 3264,\n", + " 228: 3072,\n", + " 303: 3072,\n", + " 162: 2880,\n", + " 183: 2880,\n", + " 201: 2880,\n", + " 282: 2880,\n", + " 168: 2688,\n", + " 171: 2688,\n", + " 186: 2496,\n", + " 204: 2496,\n", + " 246: 2496,\n", + " 270: 2496,\n", + " 153: 2304,\n", + " 132: 2112,\n", + " 159: 2112,\n", + " 189: 1920,\n", + " 267: 1920,\n", + " 273: 1920,\n", + " 288: 1920,\n", + " 165: 1728,\n", + " 234: 1536,\n", + " 240: 1536,\n", + " 264: 1536,\n", + " 294: 1536,\n", + " 297: 1536,\n", + " 156: 1152,\n", + " 192: 1152,\n", + " 285: 960}" ] }, "execution_count": 10, @@ -381,72 +661,72 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 183 ms, sys: 17.7 ms, total: 200 ms\n", - "Wall time: 512 ms\n" + "CPU times: user 328 ms, sys: 57.3 ms, total: 385 ms\n", + "Wall time: 1.01 s\n" ] }, { "data": { "text/plain": [ - "Counter({114: 30336,\n", - " 210: 32064,\n", - " 306: 33600,\n", - " 141: 3072,\n", - " 180: 2400,\n", - " 144: 2304,\n", - " 138: 2592,\n", - " 171: 1344,\n", - " 207: 1632,\n", - " 195: 2496,\n", - " 150: 1824,\n", - " 129: 1728,\n", - " 204: 1248,\n", - " 177: 3456,\n", - " 168: 1344,\n", - " 132: 1056,\n", - " 192: 576,\n", - " 126: 5376,\n", - " 147: 2400,\n", - " 189: 960,\n", - " 135: 2016,\n", - " 174: 2112,\n", - " 165: 864,\n", - " 186: 1248,\n", - " 201: 1440,\n", - " 153: 1152,\n", - " 198: 2208,\n", - " 183: 1440,\n", - " 162: 1440,\n", - " 156: 576,\n", - " 159: 1056,\n", - " 252: 3552,\n", - " 279: 1824,\n", - " 222: 7584,\n", - " 273: 960,\n", - " 300: 1920,\n", - " 240: 768,\n", - " 303: 1536,\n", - " 291: 1728,\n", - " 288: 960,\n", - " 246: 1248,\n", - " 249: 2592,\n", - " 243: 2016,\n", - " 231: 2016,\n", - " 234: 768,\n", - " 237: 3264,\n", - " 270: 1248,\n", - " 264: 768,\n", - " 267: 960,\n", - " 255: 1632,\n", - " 258: 2208,\n", - " 282: 1440,\n", - " 276: 1920,\n", - " 297: 768,\n", - " 261: 1632,\n", - " 225: 1824,\n", - " 228: 1536,\n", - " 285: 480,\n", - " 294: 768})" + "Counter({114: 60672,\n", + " 210: 64128,\n", + " 306: 67200,\n", + " 141: 6144,\n", + " 180: 4800,\n", + " 144: 4608,\n", + " 138: 5184,\n", + " 171: 2688,\n", + " 207: 3264,\n", + " 195: 4992,\n", + " 150: 3648,\n", + " 129: 3456,\n", + " 204: 2496,\n", + " 177: 6912,\n", + " 168: 2688,\n", + " 132: 2112,\n", + " 192: 1152,\n", + " 126: 10752,\n", + " 147: 4800,\n", + " 189: 1920,\n", + " 135: 4032,\n", + " 174: 4224,\n", + " 165: 1728,\n", + " 186: 2496,\n", + " 201: 2880,\n", + " 153: 2304,\n", + " 198: 4416,\n", + " 183: 2880,\n", + " 162: 2880,\n", + " 156: 1152,\n", + " 159: 2112,\n", + " 252: 7104,\n", + " 279: 3648,\n", + " 222: 15168,\n", + " 273: 1920,\n", + " 300: 3840,\n", + " 240: 1536,\n", + " 303: 3072,\n", + " 291: 3456,\n", + " 288: 1920,\n", + " 246: 2496,\n", + " 249: 5184,\n", + " 243: 4032,\n", + " 231: 4032,\n", + " 234: 1536,\n", + " 237: 6528,\n", + " 270: 2496,\n", + " 264: 1536,\n", + " 267: 1920,\n", + " 255: 3264,\n", + " 258: 4416,\n", + " 282: 2880,\n", + " 276: 3840,\n", + " 297: 1536,\n", + " 261: 3264,\n", + " 225: 3648,\n", + " 228: 3072,\n", + " 285: 960,\n", + " 294: 1536})" ] }, "execution_count": 12, @@ -468,8 +748,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 30.4 ms, sys: 0 ns, total: 30.4 ms\n", - "Wall time: 40.9 ms\n" + "CPU times: user 37.5 ms, sys: 86 µs, total: 37.5 ms\n", + "Wall time: 52.8 ms\n" ] }, { @@ -511,15 +791,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 639 ms, sys: 17.2 ms, total: 656 ms\n", - "Wall time: 1.06 s\n" + "CPU times: user 1.16 s, sys: 46.4 ms, total: 1.2 s\n", + "Wall time: 2.03 s\n" ] }, { "data": { "text/plain": [ - "(array([41280., 7872., 13344., 10080., 33888., 18720., 11616., 7776.,\n", - " 14208., 38496.]),\n", + "(array([82560., 15744., 26688., 20160., 67776., 37440., 23232., 15552.,\n", + " 28416., 76992.]),\n", " array([-61192.46163388, -58157.79081243, -55123.11999097, -52088.44916952,\n", " -49053.77834806, -46019.10752661, -42984.43670515, -39949.7658837 ,\n", " -36915.09506224, -33880.42424079, -30845.75341933]),\n", @@ -532,7 +812,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAGdCAYAAAACMjetAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4h0lEQVR4nO3df1xW9f3/8SegXKB2XfgLkESl2VKmiWLi5arbXHy8crTNRZs6Zmak04FLKX+wHDq3pdOp+Zu1Srx9yqXu9skKDHOYuunlL9RSFNeWhs0utI/BpXwUFM73j76cvAJPXIoh+bjfbue2Xef9Ou/zPu8d5LnDOecKMAzDEAAAAOoV2NQDAAAAuJkRlgAAACwQlgAAACwQlgAAACwQlgAAACwQlgAAACwQlgAAACwQlgAAACy0aOoBNKWamhqdOnVKt912mwICApp6OAAAoAEMw9C5c+cUFRWlwMAbf93nlg5Lp06dUnR0dFMPAwAAXIOTJ0+qc+fON3w/t3RYuu222yR9Ntl2u72JRwMAABrC6/UqOjra/D1+o93SYan2T292u52wBABAM/NV3ULDDd4AAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWWjT1AL6uuk3Pa+oh+O3E3KSmHgIAADcdriwBAABYICwBAABYICwBAABYICwBAABYICwBAABYICwBAABY8CssVVdX69e//rViYmIUGhqqb3zjG/rtb38rwzDMGsMwlJWVpU6dOik0NFSJiYl6//33ffo5e/asUlJSZLfbFRYWptTUVJ0/f96n5r333tN9992nkJAQRUdHa968eXXGs379evXo0UMhISHq3bu3Nm7c6M/hAAAAfCm/wtIf/vAHrVy5UsuWLdPRo0f1hz/8QfPmzdPSpUvNmnnz5mnJkiXKzs7W7t271bp1a7lcLl28eNGsSUlJUVFRkTZv3qzc3Fxt375d48aNM9u9Xq+GDBmirl27qrCwUPPnz9esWbP0/PPPmzU7d+7UyJEjlZqaqgMHDmjYsGEaNmyYDh8+fD3zAQAA4CPAuPKy0Jd46KGHFBERoRdffNFcl5ycrNDQUL388ssyDENRUVF66qmn9PTTT0uSysvLFRERoZycHI0YMUJHjx5VbGys9u7dq/79+0uS8vPz9b3vfU8fffSRoqKitHLlSj3zzDPyeDwKDg6WJE2fPl0bNmxQcXGxJGn48OGqqKhQbm6uOZaBAwcqLi5O2dnZDToer9crh8Oh8vJy2e32hk5Dg/BSSgAAbowb+fu7Pn5dWRo0aJAKCgr0z3/+U5L07rvv6h//+IeGDh0qSTp+/Lg8Ho8SExPNbRwOhxISEuR2uyVJbrdbYWFhZlCSpMTERAUGBmr37t1mzf33328GJUlyuVw6duyYPv30U7Pmyv3U1tTupz6VlZXyer0+CwAAgBW/vu5k+vTp8nq96tGjh4KCglRdXa3f//73SklJkSR5PB5JUkREhM92ERERZpvH41F4eLjvIFq0ULt27XxqYmJi6vRR29a2bVt5PB7L/dRnzpw5+s1vfuPPIQMAgFucX1eW1q1bp1deeUVr1qzR/v37tXr1av3xj3/U6tWrb9T4GlVmZqbKy8vN5eTJk009JAAAcJPz68rSlClTNH36dI0YMUKS1Lt3b3344YeaM2eORo8ercjISElSaWmpOnXqZG5XWlqquLg4SVJkZKROnz7t0+/ly5d19uxZc/vIyEiVlpb61NR+/rKa2vb62Gw22Ww2fw4ZAADc4vy6svR///d/Cgz03SQoKEg1NTWSpJiYGEVGRqqgoMBs93q92r17t5xOpyTJ6XSqrKxMhYWFZs2WLVtUU1OjhIQEs2b79u26dOmSWbN582bdddddatu2rVlz5X5qa2r3AwAA0Bj8Ckvf//739fvf/155eXk6ceKEXnvtNS1cuFA/+tGPJEkBAQGaNGmSfve73+mNN97QoUOH9OijjyoqKkrDhg2TJPXs2VMPPvigxo4dqz179mjHjh1KT0/XiBEjFBUVJUn66U9/quDgYKWmpqqoqEhr167V4sWLlZGRYY7lySefVH5+vhYsWKDi4mLNmjVL+/btU3p6eiNNDQAAgJ9/hlu6dKl+/etf6xe/+IVOnz6tqKgo/fznP1dWVpZZM3XqVFVUVGjcuHEqKyvTvffeq/z8fIWEhJg1r7zyitLT0/XAAw8oMDBQycnJWrJkidnucDj09ttvKy0tTfHx8erQoYOysrJ83sU0aNAgrVmzRjNmzNCvfvUr3XnnndqwYYN69ep1PfMBAADgw6/3LH3d8J4lX7xnCQDQHNzU71kCAAC41RCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALPj1niUAANA0eCVN0+HKEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAW/wlK3bt0UEBBQZ0lLS5MkXbx4UWlpaWrfvr3atGmj5ORklZaW+vRRUlKipKQktWrVSuHh4ZoyZYouX77sU7N161b169dPNptN3bt3V05OTp2xLF++XN26dVNISIgSEhK0Z88ePw8dAADgy/kVlvbu3auPP/7YXDZv3ixJ+vGPfyxJmjx5st58802tX79e27Zt06lTp/Twww+b21dXVyspKUlVVVXauXOnVq9erZycHGVlZZk1x48fV1JSkgYPHqyDBw9q0qRJeuKJJ7Rp0yazZu3atcrIyNDMmTO1f/9+9enTRy6XS6dPn76uyQAAAPiiAMMwjGvdeNKkScrNzdX7778vr9erjh07as2aNXrkkUckScXFxerZs6fcbrcGDhyot956Sw899JBOnTqliIgISVJ2dramTZumM2fOKDg4WNOmTVNeXp4OHz5s7mfEiBEqKytTfn6+JCkhIUH33HOPli1bJkmqqalRdHS0Jk6cqOnTpzd4/F6vVw6HQ+Xl5bLb7dc6DfXqNj2vUfv7KpyYm9TUQwAAXAW/Vz53I39/1+ea71mqqqrSyy+/rMcff1wBAQEqLCzUpUuXlJiYaNb06NFDXbp0kdvtliS53W717t3bDEqS5HK55PV6VVRUZNZc2UdtTW0fVVVVKiws9KkJDAxUYmKiWXM1lZWV8nq9PgsAAICVaw5LGzZsUFlZmR577DFJksfjUXBwsMLCwnzqIiIi5PF4zJorg1Jte22bVY3X69WFCxf0ySefqLq6ut6a2j6uZs6cOXI4HOYSHR3t1zEDAIBbzzWHpRdffFFDhw5VVFRUY47nhsrMzFR5ebm5nDx5sqmHBAAAbnItrmWjDz/8UH/729/0P//zP+a6yMhIVVVVqayszOfqUmlpqSIjI82aLz61Vvu03JU1X3yCrrS0VHa7XaGhoQoKClJQUFC9NbV9XI3NZpPNZvPvYAEAwC3tmq4srVq1SuHh4UpK+vzGrfj4eLVs2VIFBQXmumPHjqmkpEROp1OS5HQ6dejQIZ+n1jZv3iy73a7Y2Fiz5so+amtq+wgODlZ8fLxPTU1NjQoKCswaAACAxuL3laWamhqtWrVKo0ePVosWn2/ucDiUmpqqjIwMtWvXTna7XRMnTpTT6dTAgQMlSUOGDFFsbKxGjRqlefPmyePxaMaMGUpLSzOv+IwfP17Lli3T1KlT9fjjj2vLli1at26d8vI+fwogIyNDo0ePVv/+/TVgwAA999xzqqio0JgxY653PgAAAHz4HZb+9re/qaSkRI8//nidtkWLFikwMFDJycmqrKyUy+XSihUrzPagoCDl5uZqwoQJcjqdat26tUaPHq3Zs2ebNTExMcrLy9PkyZO1ePFide7cWS+88IJcLpdZM3z4cJ05c0ZZWVnyeDyKi4tTfn5+nZu+AQAArtd1vWepueM9S754zxIA3Lz4vfK5ZvOeJQAAgFsBYQkAAMDCNb06AMCthz8BALhVcWUJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAgt9h6T//+Y9+9rOfqX379goNDVXv3r21b98+s90wDGVlZalTp04KDQ1VYmKi3n//fZ8+zp49q5SUFNntdoWFhSk1NVXnz5/3qXnvvfd03333KSQkRNHR0Zo3b16dsaxfv149evRQSEiIevfurY0bN/p7OAAAAJb8Ckuffvqpvv3tb6tly5Z66623dOTIES1YsEBt27Y1a+bNm6clS5YoOztbu3fvVuvWreVyuXTx4kWzJiUlRUVFRdq8ebNyc3O1fft2jRs3zmz3er0aMmSIunbtqsLCQs2fP1+zZs3S888/b9bs3LlTI0eOVGpqqg4cOKBhw4Zp2LBhOnz48PXMBwAAgI8AwzCMhhZPnz5dO3bs0N///vd62w3DUFRUlJ566ik9/fTTkqTy8nJFREQoJydHI0aM0NGjRxUbG6u9e/eqf//+kqT8/Hx973vf00cffaSoqCitXLlSzzzzjDwej4KDg819b9iwQcXFxZKk4cOHq6KiQrm5ueb+Bw4cqLi4OGVnZzfoeLxerxwOh8rLy2W32xs6DQ3SbXpeo/b3VTgxN6mph4CbGOc00LT4Gfzcjfz9XR+/riy98cYb6t+/v3784x8rPDxcffv21Z///Gez/fjx4/J4PEpMTDTXORwOJSQkyO12S5LcbrfCwsLMoCRJiYmJCgwM1O7du82a+++/3wxKkuRyuXTs2DF9+umnZs2V+6mtqd1PfSorK+X1en0WAAAAK36FpQ8++EArV67UnXfeqU2bNmnChAn65S9/qdWrV0uSPB6PJCkiIsJnu4iICLPN4/EoPDzcp71FixZq166dT019fVy5j6vV1LbXZ86cOXI4HOYSHR3tz+EDAIBbkF9hqaamRv369dOzzz6rvn37aty4cRo7dmyD/+zV1DIzM1VeXm4uJ0+ebOohAQCAm5xfYalTp06KjY31WdezZ0+VlJRIkiIjIyVJpaWlPjWlpaVmW2RkpE6fPu3TfvnyZZ09e9anpr4+rtzH1Wpq2+tjs9lkt9t9FgAAACt+haVvf/vbOnbsmM+6f/7zn+rataskKSYmRpGRkSooKDDbvV6vdu/eLafTKUlyOp0qKytTYWGhWbNlyxbV1NQoISHBrNm+fbsuXbpk1mzevFl33XWX+eSd0+n02U9tTe1+AAAAGoNfYWny5MnatWuXnn32Wf3rX//SmjVr9PzzzystLU2SFBAQoEmTJul3v/ud3njjDR06dEiPPvqooqKiNGzYMEmfXYl68MEHNXbsWO3Zs0c7duxQenq6RowYoaioKEnST3/6UwUHBys1NVVFRUVau3atFi9erIyMDHMsTz75pPLz87VgwQIVFxdr1qxZ2rdvn9LT0xtpagAAAKQW/hTfc889eu2115SZmanZs2crJiZGzz33nFJSUsyaqVOnqqKiQuPGjVNZWZnuvfde5efnKyQkxKx55ZVXlJ6ergceeECBgYFKTk7WkiVLzHaHw6G3335baWlpio+PV4cOHZSVleXzLqZBgwZpzZo1mjFjhn71q1/pzjvv1IYNG9SrV6/rmQ8AAAAffr1n6euG9yz54p00sMI5DTQtfgY/d1O/ZwkAAOBWQ1gCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACw4FdYmjVrlgICAnyWHj16mO0XL15UWlqa2rdvrzZt2ig5OVmlpaU+fZSUlCgpKUmtWrVSeHi4pkyZosuXL/vUbN26Vf369ZPNZlP37t2Vk5NTZyzLly9Xt27dFBISooSEBO3Zs8efQwEAAGgQv68sfetb39LHH39sLv/4xz/MtsmTJ+vNN9/U+vXrtW3bNp06dUoPP/yw2V5dXa2kpCRVVVVp586dWr16tXJycpSVlWXWHD9+XElJSRo8eLAOHjyoSZMm6YknntCmTZvMmrVr1yojI0MzZ87U/v371adPH7lcLp0+ffpa5wEAAKBefoelFi1aKDIy0lw6dOggSSovL9eLL76ohQsX6rvf/a7i4+O1atUq7dy5U7t27ZIkvf322zpy5IhefvllxcXFaejQofrtb3+r5cuXq6qqSpKUnZ2tmJgYLViwQD179lR6eroeeeQRLVq0yBzDwoULNXbsWI0ZM0axsbHKzs5Wq1at9NJLLzXGnAAAAJj8Dkvvv/++oqKidMcddyglJUUlJSWSpMLCQl26dEmJiYlmbY8ePdSlSxe53W5JktvtVu/evRUREWHWuFwueb1eFRUVmTVX9lFbU9tHVVWVCgsLfWoCAwOVmJho1lxNZWWlvF6vzwIAAGDFr7CUkJCgnJwc5efna+XKlTp+/Ljuu+8+nTt3Th6PR8HBwQoLC/PZJiIiQh6PR5Lk8Xh8glJte22bVY3X69WFCxf0ySefqLq6ut6a2j6uZs6cOXI4HOYSHR3tz+EDAIBbUAt/iocOHWr+97vvvlsJCQnq2rWr1q1bp9DQ0EYfXGPLzMxURkaG+dnr9RKYAACApet6dUBYWJi++c1v6l//+pciIyNVVVWlsrIyn5rS0lJFRkZKkiIjI+s8HVf7+ctq7Ha7QkND1aFDBwUFBdVbU9vH1dhsNtntdp8FAADAynWFpfPnz+vf//63OnXqpPj4eLVs2VIFBQVm+7Fjx1RSUiKn0ylJcjqdOnTokM9Ta5s3b5bdbldsbKxZc2UftTW1fQQHBys+Pt6npqamRgUFBWYNAABAY/ErLD399NPatm2bTpw4oZ07d+pHP/qRgoKCNHLkSDkcDqWmpiojI0PvvPOOCgsLNWbMGDmdTg0cOFCSNGTIEMXGxmrUqFF69913tWnTJs2YMUNpaWmy2WySpPHjx+uDDz7Q1KlTVVxcrBUrVmjdunWaPHmyOY6MjAz9+c9/1urVq3X06FFNmDBBFRUVGjNmTCNODQAAgJ/3LH300UcaOXKk/vd//1cdO3bUvffeq127dqljx46SpEWLFikwMFDJycmqrKyUy+XSihUrzO2DgoKUm5urCRMmyOl0qnXr1ho9erRmz55t1sTExCgvL0+TJ0/W4sWL1blzZ73wwgtyuVxmzfDhw3XmzBllZWXJ4/EoLi5O+fn5dW76BgAAuF4BhmEYTT2IpuL1euVwOFReXt7o9y91m57XqP19FU7MTWrqIeAmxjkNNC1+Bj93I39/14fvhgMAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBwXWFp7ty5CggI0KRJk8x1Fy9eVFpamtq3b682bdooOTlZpaWlPtuVlJQoKSlJrVq1Unh4uKZMmaLLly/71GzdulX9+vWTzWZT9+7dlZOTU2f/y5cvV7du3RQSEqKEhATt2bPneg4HAACgjmsOS3v37tWf/vQn3X333T7rJ0+erDfffFPr16/Xtm3bdOrUKT388MNme3V1tZKSklRVVaWdO3dq9erVysnJUVZWlllz/PhxJSUlafDgwTp48KAmTZqkJ554Qps2bTJr1q5dq4yMDM2cOVP79+9Xnz595HK5dPr06Ws9JAAAgDquKSydP39eKSkp+vOf/6y2bdua68vLy/Xiiy9q4cKF+u53v6v4+HitWrVKO3fu1K5duyRJb7/9to4cOaKXX35ZcXFxGjp0qH77299q+fLlqqqqkiRlZ2crJiZGCxYsUM+ePZWenq5HHnlEixYtMve1cOFCjR07VmPGjFFsbKyys7PVqlUrvfTSS9czHwAAAD6uKSylpaUpKSlJiYmJPusLCwt16dIln/U9evRQly5d5Ha7JUlut1u9e/dWRESEWeNyueT1elVUVGTWfLFvl8tl9lFVVaXCwkKfmsDAQCUmJpo19amsrJTX6/VZAAAArLTwd4NXX31V+/fv1969e+u0eTweBQcHKywszGd9RESEPB6PWXNlUKptr22zqvF6vbpw4YI+/fRTVVdX11tTXFx81bHPmTNHv/nNbxp2oAAAAPLzytLJkyf15JNP6pVXXlFISMiNGtMNk5mZqfLycnM5efJkUw8JAADc5PwKS4WFhTp9+rT69eunFi1aqEWLFtq2bZuWLFmiFi1aKCIiQlVVVSorK/PZrrS0VJGRkZKkyMjIOk/H1X7+shq73a7Q0FB16NBBQUFB9dbU9lEfm80mu93uswAAAFjxKyw98MADOnTokA4ePGgu/fv3V0pKivnfW7ZsqYKCAnObY8eOqaSkRE6nU5LkdDp16NAhn6fWNm/eLLvdrtjYWLPmyj5qa2r7CA4OVnx8vE9NTU2NCgoKzBoAAIDG4Nc9S7fddpt69erls65169Zq3769uT41NVUZGRlq166d7Ha7Jk6cKKfTqYEDB0qShgwZotjYWI0aNUrz5s2Tx+PRjBkzlJaWJpvNJkkaP368li1bpqlTp+rxxx/Xli1btG7dOuXl5Zn7zcjI0OjRo9W/f38NGDBAzz33nCoqKjRmzJjrmhAAAIAr+X2D95dZtGiRAgMDlZycrMrKSrlcLq1YscJsDwoKUm5uriZMmCCn06nWrVtr9OjRmj17tlkTExOjvLw8TZ48WYsXL1bnzp31wgsvyOVymTXDhw/XmTNnlJWVJY/Ho7i4OOXn59e56RsAAOB6BBiGYTT1IJqK1+uVw+FQeXl5o9+/1G163pcX3WROzE1q6iHgJsY5DTQtfgY/dyN/f9eH74YDAACwQFgCAACwQFgCAACwQFgCAACw0OhPwwHAzYIbYgE0Bq4sAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWPArLK1cuVJ333237Ha77Ha7nE6n3nrrLbP94sWLSktLU/v27dWmTRslJyertLTUp4+SkhIlJSWpVatWCg8P15QpU3T58mWfmq1bt6pfv36y2Wzq3r27cnJy6oxl+fLl6tatm0JCQpSQkKA9e/b4cygAAAAN4ldY6ty5s+bOnavCwkLt27dP3/3ud/XDH/5QRUVFkqTJkyfrzTff1Pr167Vt2zadOnVKDz/8sLl9dXW1kpKSVFVVpZ07d2r16tXKyclRVlaWWXP8+HElJSVp8ODBOnjwoCZNmqQnnnhCmzZtMmvWrl2rjIwMzZw5U/v371efPn3kcrl0+vTp650PAAAAHwGGYRjX00G7du00f/58PfLII+rYsaPWrFmjRx55RJJUXFysnj17yu12a+DAgXrrrbf00EMP6dSpU4qIiJAkZWdna9q0aTpz5oyCg4M1bdo05eXl6fDhw+Y+RowYobKyMuXn50uSEhISdM8992jZsmWSpJqaGkVHR2vixImaPn16g8fu9XrlcDhUXl4uu91+PdNQR7fpeY3a31fhxNykph4CbmLN8Zxujvg5xNU0x5/BG3U+38jf3/W55nuWqqur9eqrr6qiokJOp1OFhYW6dOmSEhMTzZoePXqoS5cucrvdkiS3263evXubQUmSXC6XvF6veXXK7Xb79FFbU9tHVVWVCgsLfWoCAwOVmJho1lxNZWWlvF6vzwIAAGDF77B06NAhtWnTRjabTePHj9drr72m2NhYeTweBQcHKywszKc+IiJCHo9HkuTxeHyCUm17bZtVjdfr1YULF/TJJ5+ourq63praPq5mzpw5cjgc5hIdHe3v4QMAgFuM32Hprrvu0sGDB7V7925NmDBBo0eP1pEjR27E2BpdZmamysvLzeXkyZNNPSQAAHCTa+HvBsHBwerevbskKT4+Xnv37tXixYs1fPhwVVVVqayszOfqUmlpqSIjIyVJkZGRdZ5aq31a7sqaLz5BV1paKrvdrtDQUAUFBSkoKKjemto+rsZms8lms/l7yAAA4BZ23e9ZqqmpUWVlpeLj49WyZUsVFBSYbceOHVNJSYmcTqckyel06tChQz5PrW3evFl2u12xsbFmzZV91NbU9hEcHKz4+HifmpqaGhUUFJg1AAAAjcWvK0uZmZkaOnSounTponPnzmnNmjXaunWrNm3aJIfDodTUVGVkZKhdu3ay2+2aOHGinE6nBg4cKEkaMmSIYmNjNWrUKM2bN08ej0czZsxQWlqaecVn/PjxWrZsmaZOnarHH39cW7Zs0bp165SX9/lTABkZGRo9erT69++vAQMG6LnnnlNFRYXGjBnTiFMDAADgZ1g6ffq0Hn30UX388cdyOBy6++67tWnTJv3Xf/2XJGnRokUKDAxUcnKyKisr5XK5tGLFCnP7oKAg5ebmasKECXI6nWrdurVGjx6t2bNnmzUxMTHKy8vT5MmTtXjxYnXu3FkvvPCCXC6XWTN8+HCdOXNGWVlZ8ng8iouLU35+fp2bvgEAAK7Xdb9nqTnjPUu+eL8LrDTHc7o54ucQV9Mcfwa/Lu9Z8vsGbwAAmrvmGDzQdPgiXQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAu8OgDNWnN8/Jf36ABA88KVJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAstmnoAAIDPdZue19RD8NuJuUlNPQTghuLKEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAW/wtKcOXN0zz336LbbblN4eLiGDRumY8eO+dRcvHhRaWlpat++vdq0aaPk5GSVlpb61JSUlCgpKUmtWrVSeHi4pkyZosuXL/vUbN26Vf369ZPNZlP37t2Vk5NTZzzLly9Xt27dFBISooSEBO3Zs8efwwEAAPhSfoWlbdu2KS0tTbt27dLmzZt16dIlDRkyRBUVFWbN5MmT9eabb2r9+vXatm2bTp06pYcffthsr66uVlJSkqqqqrRz506tXr1aOTk5ysrKMmuOHz+upKQkDR48WAcPHtSkSZP0xBNPaNOmTWbN2rVrlZGRoZkzZ2r//v3q06ePXC6XTp8+fT3zAQAA4CPAMAzjWjc+c+aMwsPDtW3bNt1///0qLy9Xx44dtWbNGj3yyCOSpOLiYvXs2VNut1sDBw7UW2+9pYceekinTp1SRESEJCk7O1vTpk3TmTNnFBwcrGnTpikvL0+HDx829zVixAiVlZUpPz9fkpSQkKB77rlHy5YtkyTV1NQoOjpaEydO1PTp0xs0fq/XK4fDofLyctnt9mudhnrx/U5fDeb5q9Mc5xpfjeZ4TnM+fzVu1LlxI39/1+e6vki3vLxcktSuXTtJUmFhoS5duqTExESzpkePHurSpYsZltxut3r37m0GJUlyuVyaMGGCioqK1LdvX7ndbp8+amsmTZokSaqqqlJhYaEyMzPN9sDAQCUmJsrtdl91vJWVlaqsrDQ/e73eaz944BrxjzQANC/XfIN3TU2NJk2apG9/+9vq1auXJMnj8Sg4OFhhYWE+tREREfJ4PGbNlUGptr22zarG6/XqwoUL+uSTT1RdXV1vTW0f9ZkzZ44cDoe5REdH+3/gAADglnLNYSktLU2HDx/Wq6++2pjjuaEyMzNVXl5uLidPnmzqIQEAgJvcNf0ZLj09Xbm5udq+fbs6d+5sro+MjFRVVZXKysp8ri6VlpYqMjLSrPniU2u1T8tdWfPFJ+hKS0tlt9sVGhqqoKAgBQUF1VtT20d9bDabbDab/wcMAABuWX5dWTIMQ+np6Xrttde0ZcsWxcTE+LTHx8erZcuWKigoMNcdO3ZMJSUlcjqdkiSn06lDhw75PLW2efNm2e12xcbGmjVX9lFbU9tHcHCw4uPjfWpqampUUFBg1gAAADQGv64spaWlac2aNXr99dd12223mfcHORwOhYaGyuFwKDU1VRkZGWrXrp3sdrsmTpwop9OpgQMHSpKGDBmi2NhYjRo1SvPmzZPH49GMGTOUlpZmXvUZP368li1bpqlTp+rxxx/Xli1btG7dOuXlfX5jbEZGhkaPHq3+/ftrwIABeu6551RRUaExY8Y01twAAAD4F5ZWrlwpSfrOd77js37VqlV67LHHJEmLFi1SYGCgkpOTVVlZKZfLpRUrVpi1QUFBys3N1YQJE+R0OtW6dWuNHj1as2fPNmtiYmKUl5enyZMna/HixercubNeeOEFuVwus2b48OE6c+aMsrKy5PF4FBcXp/z8/Do3fQMAAFyP63rPUnPHe5Z88a4UANeCfztwNV+X9yzx3XAAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAW/PoiXXy98V1JAADURVgCAFwX/o8Wvu74MxwAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFv8PS9u3b9f3vf19RUVEKCAjQhg0bfNoNw1BWVpY6deqk0NBQJSYm6v333/epOXv2rFJSUmS32xUWFqbU1FSdP3/ep+a9997Tfffdp5CQEEVHR2vevHl1xrJ+/Xr16NFDISEh6t27tzZu3Ojv4QAAAFjyOyxVVFSoT58+Wr58eb3t8+bN05IlS5Sdna3du3erdevWcrlcunjxolmTkpKioqIibd68Wbm5udq+fbvGjRtntnu9Xg0ZMkRdu3ZVYWGh5s+fr1mzZun55583a3bu3KmRI0cqNTVVBw4c0LBhwzRs2DAdPnzY30MCAAC4qgDDMIxr3jggQK+99pqGDRsm6bOrSlFRUXrqqaf09NNPS5LKy8sVERGhnJwcjRgxQkePHlVsbKz27t2r/v37S5Ly8/P1ve99Tx999JGioqK0cuVKPfPMM/J4PAoODpYkTZ8+XRs2bFBxcbEkafjw4aqoqFBubq45noEDByouLk7Z2dkNGr/X65XD4VB5ebnsdvu1TkO9uk3Pa9T+AABobk7MTboh/d7I39/1adR7lo4fPy6Px6PExERzncPhUEJCgtxutyTJ7XYrLCzMDEqSlJiYqMDAQO3evdusuf/++82gJEkul0vHjh3Tp59+atZcuZ/amtr9AAAANIYWjdmZx+ORJEVERPisj4iIMNs8Ho/Cw8N9B9Gihdq1a+dTExMTU6eP2ra2bdvK4/FY7qc+lZWVqqysND97vV5/Dg8AANyCbqmn4ebMmSOHw2Eu0dHRTT0kAABwk2vUsBQZGSlJKi0t9VlfWlpqtkVGRur06dM+7ZcvX9bZs2d9aurr48p9XK2mtr0+mZmZKi8vN5eTJ0/6e4gAAOAW06hhKSYmRpGRkSooKDDXeb1e7d69W06nU5LkdDpVVlamwsJCs2bLli2qqalRQkKCWbN9+3ZdunTJrNm8ebPuuusutW3b1qy5cj+1NbX7qY/NZpPdbvdZAAAArPgdls6fP6+DBw/q4MGDkj67qfvgwYMqKSlRQECAJk2apN/97nd64403dOjQIT366KOKiooyn5jr2bOnHnzwQY0dO1Z79uzRjh07lJ6erhEjRigqKkqS9NOf/lTBwcFKTU1VUVGR1q5dq8WLFysjI8Mcx5NPPqn8/HwtWLBAxcXFmjVrlvbt26f09PTrnxUAAID/z+8bvPft26fBgwebn2sDzOjRo5WTk6OpU6eqoqJC48aNU1lZme69917l5+crJCTE3OaVV15Renq6HnjgAQUGBio5OVlLliwx2x0Oh95++22lpaUpPj5eHTp0UFZWls+7mAYNGqQ1a9ZoxowZ+tWvfqU777xTGzZsUK9eva5pIgAAAOpzXe9Zau54zxIAADcO71kCAAC4BRCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALDT7sLR8+XJ169ZNISEhSkhI0J49e5p6SAAA4GukWYeltWvXKiMjQzNnztT+/fvVp08fuVwunT59uqmHBgAAviaadVhauHChxo4dqzFjxig2NlbZ2dlq1aqVXnrppaYeGgAA+Jpo0dQDuFZVVVUqLCxUZmamuS4wMFCJiYlyu931blNZWanKykrzc3l5uSTJ6/U2+vhqKv+v0fsEAKA5uRG/X6/s1zCMG9L/FzXbsPTJJ5+ourpaERERPusjIiJUXFxc7zZz5szRb37zmzrro6Ojb8gYAQC4lTmeu7H9nzt3Tg6H48buRM04LF2LzMxMZWRkmJ9ramp09uxZtW/fXgEBAU04svp5vV5FR0fr5MmTstvtTT2cmxbz1DDMU8MxVw3DPDUM89RwDZ0rwzB07tw5RUVFfSXjarZhqUOHDgoKClJpaanP+tLSUkVGRta7jc1mk81m81kXFhZ2o4bYaOx2Oz9gDcA8NQzz1HDMVcMwTw3DPDVcQ+bqq7iiVKvZ3uAdHBys+Ph4FRQUmOtqampUUFAgp9PZhCMDAABfJ832ypIkZWRkaPTo0erfv78GDBig5557ThUVFRozZkxTDw0AAHxNNOuwNHz4cJ05c0ZZWVnyeDyKi4tTfn5+nZu+myubzaaZM2fW+dMhfDFPDcM8NRxz1TDMU8MwTw13s85VgPFVPXcHAADQDDXbe5YAAAC+CoQlAAAAC4QlAAAAC4QlAAAAC4Slr0heXp4SEhIUGhqqtm3batiwYT7tJSUlSkpKUqtWrRQeHq4pU6bo8uXLPjVbt25Vv379ZLPZ1L17d+Xk5NTZz/Lly9WtWzeFhIQoISFBe/bs8Wm/ePGi0tLS1L59e7Vp00bJycl1XuzZVLp166aAgACfZe7cuWb7iRMn6rQHBARo165dPv2sX79ePXr0UEhIiHr37q2NGzf6tBuGoaysLHXq1EmhoaFKTEzU+++/71Nz9uxZpaSkyG63KywsTKmpqTp//vyNO3g/fNk8SdJ7772n++67TyEhIYqOjta8efPq9PN1n6crVVZWKi4uTgEBATp48KC5nnPK19XmSeKckqQf/OAH6tKli0JCQtSpUyeNGjVKp06dMts5nz7zZfMkNcPzycAN99e//tVo27atsXLlSuPYsWNGUVGRsXbtWrP98uXLRq9evYzExETjwIEDxsaNG40OHToYmZmZZs0HH3xgtGrVysjIyDCOHDliLF261AgKCjLy8/PNmldffdUIDg42XnrpJaOoqMgYO3asERYWZpSWlpo148ePN6Kjo42CggJj3759xsCBA41BgwZ9NRPxJbp27WrMnj3b+Pjjj83l/PnzZvvx48cNScbf/vY3n5qqqiqzZseOHUZQUJAxb94848iRI8aMGTOMli1bGocOHTJr5s6dazgcDmPDhg3Gu+++a/zgBz8wYmJijAsXLpg1Dz74oNGnTx9j165dxt///neje/fuxsiRI7+aifgSXzZP5eXlRkREhJGSkmIcPnzY+Mtf/mKEhoYaf/rTn8yaW2GervTLX/7SGDp0qCHJOHDggLmec8rX1eaJc+ozCxcuNNxut3HixAljx44dhtPpNJxOp9nO+fSZL5un5ng+EZZusEuXLhm333678cILL1y1ZuPGjUZgYKDh8XjMdStXrjTsdrtRWVlpGIZhTJ061fjWt77ls93w4cMNl8tlfh4wYICRlpZmfq6urjaioqKMOXPmGIZhGGVlZUbLli2N9evXmzVHjx41JBlut/v6DrQRdO3a1Vi0aNFV22v/IbryH/Ev+slPfmIkJSX5rEtISDB+/vOfG4ZhGDU1NUZkZKQxf/58s72srMyw2WzGX/7yF8MwDOPIkSOGJGPv3r1mzVtvvWUEBAQY//nPf67hyBrXl83TihUrjLZt25rnjmEYxrRp04y77rrL/HwrzFOtjRs3Gj169DCKioquGpZu9XPKMKzniXOqfq+//roREBBghiHOp/p9cZ6a4/nEn+FusP379+s///mPAgMD1bdvX3Xq1ElDhw7V4cOHzRq3263evXv7vEzT5XLJ6/WqqKjIrElMTPTp2+Vyye12S5KqqqpUWFjoUxMYGKjExESzprCwUJcuXfKp6dGjh7p06WLWNLW5c+eqffv26tu3r+bPn1/nT5HSZ5d4w8PDde+99+qNN97wafuyeTp+/Lg8Ho9PjcPhUEJCglnjdrsVFham/v37mzWJiYkKDAzU7t27G+1Yr4fVPLndbt1///0KDg4217lcLh07dkyffvqpWXMrzFNpaanGjh2r//7v/1arVq2uWnern1NfNk+cU3WdPXtWr7zyigYNGqSWLVv6tN3q59OV6pun5ng+EZZusA8++ECSNGvWLM2YMUO5ublq27atvvOd7+js2bOSJI/HU+et47WfPR6PZY3X69WFCxf0ySefqLq6ut6aK/sIDg6u8+XBV9Y0pV/+8pd69dVX9c477+jnP/+5nn32WU2dOtVsb9OmjRYsWKD169crLy9P9957r4YNG+bzj9HV5unKOahdZ1UTHh7u096iRQu1a9euWczT9ZxPX6d5MgxDjz32mMaPH+/zj+WVOKcaNk+cU5+bNm2aWrdurfbt26ukpESvv/662cb59DmreWqO5xNh6RpNnz693hv5rlyKi4tVU1MjSXrmmWeUnJys+Ph4rVq1SgEBAVq/fn0TH8WN19B5kj77rr/vfOc7uvvuuzV+/HgtWLBAS5cuVWVlpSSpQ4cOysjIUEJCgu655x7NnTtXP/vZzzR//vymPMRG0Zjz9HXX0LlaunSpzp07p8zMzKv2xTnVsHn6OvPnZ0+SpkyZogMHDujtt99WUFCQHn30URn//4swOJ8aNk/NUbP+brim9NRTT+mxxx6zrLnjjjv08ccfS5JiY2PN9TabTXfccYdKSkokSZGRkXWeWqt9Qi0yMtL8zy8+tVZaWiq73a7Q0FAFBQUpKCio3por+6iqqlJZWZnP1aUraxpbQ+epPgkJCbp8+bJOnDihu+6666o1mzdvNj9fbZ6unIPadZ06dfKpiYuLM2tOnz7t08fly5d19uzZZjFPV5sD6cvPp5t9nqSGz9WWLVvkdrvrfMdU//79lZKSotWrV9e77a12TjVknr7O55S/P3sdOnRQhw4d9M1vflM9e/ZUdHS0du3aJafTWe+2t9r5VMtqnprl+eTXHU7wW3l5uWGz2Xxu8K6qqjLCw8PNO/9rb/C+8qm1P/3pT4bdbjcuXrxoGMZnN3j36tXLp++RI0fWucE7PT3d/FxdXW3cfvvtdW7w/utf/2rWFBcX3zQ3eH/Ryy+/bAQGBhpnz569as0TTzxh9O3b1/z8k5/8xHjooYd8apxOZ52bAv/4xz+a7bX/G33xpsB9+/aZNZs2bbppb5784jzV3jx55RM4mZmZdW6e/LrP04cffmgcOnTIXDZt2mRIMv76178aJ0+evOp2t9o51ZB54pyq34cffmhIMt55552r1txq51N9vjhPzfF8Iix9BZ588knj9ttvNzZt2mQUFxcbqampRnh4uPnLrfbVAUOGDDEOHjxo5OfnGx07dqz31QFTpkwxjh49aixfvrzeVwfYbDYjJyfHOHLkiDFu3DgjLCzM5ym78ePHG126dDG2bNli7Nu3r84jnU1l586dxqJFi4yDBw8a//73v42XX37Z6Nixo/Hoo4+aNTk5OcaaNWuMo0ePGkePHjV+//vfG4GBgcZLL71k1uzYscNo0aKF8cc//tE4evSoMXPmzHofNw0LCzNef/1147333jN++MMf1vu4ad++fY3du3cb//jHP4w777zzpngstyHzVFZWZkRERBijRo0yDh8+bLz66qtGq1at6jyW+3Wep/rU96QS51Rd9c0T55Rh7Nq1y1i6dKlx4MAB48SJE0ZBQYExaNAg4xvf+Ib5f2o5nxo2T83xfCIsfQWqqqqMp556yggPDzduu+02IzEx0Th8+LBPzYkTJ4yhQ4caoaGhRocOHYynnnrKuHTpkk/NO++8Y8TFxRnBwcHGHXfcYaxatarOvpYuXWp06dLFCA4ONgYMGGDs2rXLp/3ChQvGL37xC6Nt27ZGq1atjB/96EfGxx9/3OjH7K/CwkIjISHBcDgcRkhIiNGzZ0/j2WefNX+4DOOzf4h69uxptGrVyrDb7caAAQN8XoNQa926dcY3v/lNIzg42PjWt75l5OXl+bTX1NQYv/71r42IiAjDZrMZDzzwgHHs2DGfmv/93/81Ro4cabRp08aw2+3GmDFjjHPnzt2Yg/dDQ+bJMAzj3XffNe69917DZrMZt99+uzF37tw6fX2d56k+VwtLt/o59UVXe/z9Vj+n3nvvPWPw4MFGu3btDJvNZnTr1s0YP3688dFHH5k1nE8NmyfDaH7nU4BhNOM7rgAAAG4wnoYDAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACw8P8AiF7xM8gGAN8AAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -555,15 +835,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'range': {'n_atoms': {'lte': '300', 'gte': '200'}}}\n", - "CPU times: user 2.6 ms, sys: 0 ns, total: 2.6 ms\n", - "Wall time: 4.99 ms\n" + "CPU times: user 2.09 ms, sys: 0 ns, total: 2.09 ms\n", + "Wall time: 5.36 ms\n" ] }, { "data": { "text/plain": [ - "84768" + "169536" ] }, "execution_count": 15, @@ -586,14 +865,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 665 µs, sys: 403 µs, total: 1.07 ms\n", - "Wall time: 195 ms\n" + "CPU times: user 2.8 ms, sys: 0 ns, total: 2.8 ms\n", + "Wall time: 396 ms\n" ] }, { "data": { "text/plain": [ - "84768" + "169536" ] }, "execution_count": 16, @@ -618,15 +897,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'bool': {'must': [{'match': {'formula': {'query': 'C48H28O32Zr6', 'zero_terms_query': 'all'}}}, {'match': {'username': {'query': 'ubuntu', 'zero_terms_query': 'all'}}}]}}\n", - "CPU times: user 1.67 ms, sys: 0 ns, total: 1.67 ms\n", - "Wall time: 4.74 ms\n" + "CPU times: user 3.08 ms, sys: 0 ns, total: 3.08 ms\n", + "Wall time: 6.34 ms\n" ] }, { "data": { "text/plain": [ - "30336" + "60672" ] }, "execution_count": 17, @@ -649,14 +927,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 507 µs, sys: 307 µs, total: 814 µs\n", - "Wall time: 192 ms\n" + "CPU times: user 816 µs, sys: 3.33 ms, total: 4.14 ms\n", + "Wall time: 409 ms\n" ] }, { "data": { "text/plain": [ - "30336" + "60672" ] }, "execution_count": 18, @@ -683,15 +961,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'term': {'pbc': {'value': 'true'}}}\n", - "CPU times: user 1.61 ms, sys: 0 ns, total: 1.61 ms\n", - "Wall time: 4.05 ms\n" + "CPU times: user 4.41 ms, sys: 69 µs, total: 4.48 ms\n", + "Wall time: 7.81 ms\n" ] }, { "data": { "text/plain": [ - "197280" + "394560" ] }, "execution_count": 19, @@ -714,14 +991,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.12 ms, sys: 677 µs, total: 1.8 ms\n", - "Wall time: 206 ms\n" + "CPU times: user 5.05 ms, sys: 246 µs, total: 5.3 ms\n", + "Wall time: 425 ms\n" ] }, { "data": { "text/plain": [ - "197280" + "394560" ] }, "execution_count": 20, @@ -746,11 +1023,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'range': {'modified': {'lte': '2023-09-06T12:30:32.0000001'}}}\n", "152\n", - "{'_id': 'qcR-aooBr-e2h_f6LzMC', 'n_atoms': 114, 'cell': [[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], 'pbc': [True, True, True], 'formula': 'C48H28O32Zr6', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 40, 40, 40, 40, 40, 40], 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296], [10.391931260040497, 9.232075241735581, 8.799170748954813], [15.152442318761134, 3.2144705981189303, 0.6236271192356346], [15.428455018627362, 13.198368239182761, 6.757442369774353], [20.968952462595865, 8.354501228588285, 5.937790321351722], [12.821718368988067, 11.860905590260213, 0.764468940894911], [20.164574198879585, 13.449131931085539, 8.500504258460039], [5.203325638335655, 4.037525599970674, 3.6535544413570706], [6.476452578322519, 9.882112891744764, 0.7336632917566172], [14.332783438660714, 4.5739237510789845, 5.763830060388294], [12.20845295758527, 7.975607890442319, 2.7181563401019804], [26.126453831046035, 15.25865575215541, 8.681035572143871], [7.431639790543854, 7.68880010777489, 3.739705967641281], [21.52510600020679, 15.432405681052952, 8.675468268048236], [11.49107468172553, 9.60164215963523, 0.7009214784567679], [18.70674083756121, 4.607625571215378, 5.677858158016438], [17.34676875755316, 10.130528920703508, 4.483049170020872], [2.9330861621787743, 3.3990818416373494, 0.720770788622487], [22.67189915206641, 9.23882668038352, 8.661796350384211], [15.54501705742674, 14.931708899088871, 4.905010140501105], [20.180891240581246, 11.991517760259551, 0.36399634878062614], [13.537900990107627, 8.71222318139275, 4.81955270950513], [13.02082403030889, 3.7798366294145125, 3.8744821907763676], [12.738608267554484, 13.15125952920471, 8.605595280531846], [9.30512423974256, 4.003262597986021, 2.08391144947309], [13.66172535110934, 6.786427797477926, 9.451058899918706], [19.297496722626608, 6.66303528741421, 9.65403361924748], [16.364750768476505, 11.479160632545504, 9.561987519221761], [16.965060879285595, 12.039276627942046, 3.4048076978088133], [9.872075532499599, 8.868306222192839, 9.697764141289875], [23.093789800187132, 7.8655671087878325, 11.819108411864843], [15.963054660441902, 12.957563889928995, 5.796919712452191], [20.789096532494103, 7.4060484208004835, 5.418655862348822], [20.271838371834924, 15.445983071791856, 11.893743962525676], [20.235785459686173, 13.946169611391733, 9.533785527794883], [5.468638782470736, 4.963675408702813, 4.207234520017469], [13.30165782031905, 14.29182491089219, 11.848895929341783], [13.860085269446175, 5.316906102226919, 5.113333629612867], [12.690982408563865, 7.210540402496312, 3.452020777408105], [26.695527891724396, 14.913992347710204, 9.505586002333807], [6.71805210100351, 7.061748291218562, 4.294338782243351], [20.991757417986378, 15.139143269943215, 9.631524849633491], [19.51051107137078, 14.241804768445284, 11.863723702327984], [19.467115888926717, 5.338714780974876, 5.342773829476735], [17.04442989892783, 11.179573262577135, 4.5377135674632525], [9.75214492091448, 7.909665520118783, 11.856922309351365], [23.22825354947115, 8.805217731024735, 9.58442235219589], [16.00517946832365, 13.88422938185283, 4.718010810191537], [27.480789782170447, 15.480087960917762, 11.617172959535212], [13.389347998136012, 7.596486021826197, 4.594226402069221], [13.123855701056296, 4.872440299211145, 4.040302647886903], [12.64753966896389, 13.717491998222464, 9.492966517597923], [20.97530038245366, 8.335251317448973, 10.600423519640026], [16.502636637988203, 10.85079294806207, 6.900496977690235], [9.019607067373974, 1.3080109208445687, 2.1730394493971033], [12.07299373071628, 8.404520741314311, 10.81235772773731], [18.747600350981866, 12.258841111031488, 10.56065092033455], [7.015172452108002, 5.4262894076561405, 2.347590240376888], [18.78251160034123, 7.0857013283180255, 6.946566060828641], [14.224469007695566, 4.466485050187827, 10.705760127563483], [11.603451262170989, 5.349451807744306, 2.0915175410667244], [18.543838401175798, 4.375079148751448, 10.585231910862703], [14.160667831243819, 12.098710870007285, 10.795787760496086], [14.45018544044642, 7.011509947840652, 6.832073401261807], [16.477007379156557, 13.350335720646678, 3.479800617715256], [10.582432902092235, 8.377782469927169, 10.803729958995012], [22.44315777833504, 8.370354822180708, 10.648120637633818], [16.567659589367246, 11.689973911016514, 5.702162918040675], [19.752709342190204, 6.556676208576273, 5.912505297587319], [20.937902942140084, 15.937436540790538, 10.817725992408736], [19.488886710337034, 13.540165709815675, 10.668618978356523], [6.371529009087222, 5.8630806488390075, 3.652784530849877], [13.287113670442396, 13.309066380643525, 10.810269250499239], [13.887372022428522, 6.66488149144997, 5.501186858702026], [12.488596287754628, 5.853559681882998, 3.200679859004045], [26.74856971012843, 15.90075025114669, 10.529236108850316], [10.08315746894864, 1.501050200458931, 1.573941101482483], [12.62721678223712, 8.955703669760464, 9.832552292079287], [20.39919235012287, 7.4641321602901884, 11.35141521129136], [15.34144808796582, 10.482235049392207, 7.284668181164747], [19.016384691239136, 8.259448068479994, 7.407999661220768], [13.965549620748263, 5.310837048175544, 11.629395661906845], [18.964757008970206, 11.57454425853002, 9.498296919639932], [6.724169659438005, 4.266531959190234, 1.8722045706136907], [14.989309002358754, 11.848517668179902, 11.724244051601735], [15.200359309885917, 6.086405481062033, 7.400205812261053], [10.683892382055284, 6.184519761996729, 1.7395703404079805], [17.662435367568307, 4.527759521878119, 9.64210249881064], [7.857986041984485, 6.202923889200398, 1.7950802519056281], [14.981497208999828, 4.615121609824532, 9.745747168438765], [17.887354642477394, 12.0129465321045, 11.494403179921116], [17.806453279094754, 6.309886859779258, 7.194991281595532], [17.65884499015476, 10.488278242553303, 7.416379002429795], [12.548166448986402, 7.693791677648976, 11.776895469518676], [20.34393939120336, 9.206817729894354, 9.922408032378584], [7.857463468905509, 1.74909233925955, 1.8274196418121598], [18.857763430781485, 5.182306608404846, 11.489619301518633], [14.163844810128705, 8.142657329873755, 7.360230728587728], [11.792894920225645, 4.155462409412462, 1.7001921686281716], [13.835964747759567, 11.264146337910587, 9.884038668157677], [18.442882359243793, 7.121400257955315, 9.800715117804442], [16.392129968717537, 10.582139510371382, 9.857218178507331], [9.302433061412287, 3.9652007015997683, 1.0673996577713942], [14.478398449342821, 7.162072897735833, 9.816748197723141], [15.11113884153613, 9.160319431120458, 11.201390619831347], [16.44457564782678, 6.551160139418165, 11.181147839950713], [16.50639593935091, 8.350138210113135, 8.738223430491407], [18.033642210935266, 9.122422759734661, 11.23846663261546], [10.893906920444666, 2.90658841770915, -0.019051977695474183], [14.757549238860731, 9.308555112561303, 9.214183379238824], [18.225880348003468, 9.344784918068, 9.180851999025142], [7.388359137923477, 3.030521667505481, 0.14692076074094484], [9.220983449272417, 6.075785767588413, 0.00657883162539968], [16.440728831136994, 6.2934328881825925, 9.187973671216682]], 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563], [0.16925367694563342, -0.01943702713953078, 0.38893903196958485], [0.01574595116377608, 0.17132290092535438, -0.1999552221020049], [0.6313750521363777, 0.05251478601615336, -0.8064430222079316], [-0.09833287623511343, -0.138000887230052, -0.08874934559146055], [0.19781246456634455, -0.9287673780647797, -1.0439826331463689], [0.07987955323902354, 0.3227860853196942, 1.7840037712935266], [0.3716884711227413, 0.4696845328184121, 0.370453313071228], [-0.14715298673081575, -0.28619517081945, -0.2515490388965677], [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577], [0.9120773177492224, -1.200046035662623, 1.3240873743396222], [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373], [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696], [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312], [1.1617998448227365, 0.7015918847484289, -1.3710321220485349], [0.19070902021846559, 0.8070636865577138, 0.18004202662121627], [0.22735084256512936, 0.16909786808234928, -0.1733360748519467], [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667], [1.0559542045857038, -0.7522446777152361, 1.3649098107407422], [0.7762546692811232, -1.6189836731314526, -0.7746117342375823], [0.028996903632322036, -0.4549212573567351, -0.15349589870642655], [-0.30669869163043734, -1.474741689618629, -0.22373535702799768], [-0.0288004713360364, 0.7124475973319003, -0.205686211479239], [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461], [0.024624999488289372, -0.06313755663570486, -1.801891965772292], [-0.06351139506345264, -0.159376012078223, -0.050473844173332186], [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655], [0.16200265126520474, 1.5060489867253262, -0.5510645819751359], [0.11499568285511277, -1.536591123473717, -0.46194088373525655], [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978], [1.3289642431824835, 0.5107142001499065, -0.8944297785444403], [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165], [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311], [-1.8548078439117515, 0.38714131650410166, 2.989239836187592], [0.4506861359111631, 1.1364487657471294, -0.6025277867192889], [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987], [1.0444392611021323, -2.684139342402327, -1.0437898003947796], [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224], [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265], [0.1290364782741685, 1.0164497157641128, 0.008965437397225301], [0.2684093062541945, -0.321198686108611, -1.4872871313266933], [-0.643392389215423, -0.898736890883939, -1.098486424717582], [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579], [0.8447910287406576, -1.5317872739661245, -1.3569157361908561], [0.17489879147082638, 0.8299521628403577, -1.920810638125738], [0.3438521635446514, 0.034021868028403804, 0.7196574853586566], [-0.49089716393457716, 1.32019318119886, -0.94368029174169], [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858], [-0.5808395014825368, 3.6184937280564875, 2.299079591174048], [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825], [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758], [0.545840099958743, 0.6951430433146117, 4.287385291900738], [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216], [1.0342078124131395, -1.450620626387826, 0.9064692271123347], [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989], [-1.71670514098886, -1.4817345765222634, 0.997617412133566], [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182], [0.25302742332541905, -0.05521598720041631, 0.07509935788229247], [0.4694140527455162, -0.5513201496485756, -0.6341384740218201], [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704], [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858], [0.303680730512897, -1.6201972339147879, -2.1682659662612322], [-1.573935371256837, -0.7716241121396255, 0.4766830761494265], [0.8572325978731995, -0.38723439044553537, 0.7948401469896356], [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165], [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755], [0.898834592811411, -1.0161931196493315, 0.970241846276607], [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992], [-0.24175827732254024, 1.3876321937466838, 0.534561183763117], [0.6898959355886991, -1.1971756558776325, -2.6977317373963534], [0.3469128049778768, -0.8706239325849023, -0.7535286867304852], [0.5164133078455445, -1.1737287359463862, -0.8774512404325054], [0.07070174270471455, 2.0437350895055317, -2.2527981881306025], [0.3288256070994712, -0.3856912142121501, -0.4543299035851946], [0.46220673582211436, -1.5513682828935087, 0.2903063650833339], [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991], [1.9752635218505312, 0.3340079230208498, -1.0452476159967947], [0.8562020996487061, 0.4789574741768384, -0.4695076409076208], [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598], [0.4557039012178522, 0.3706605440016026, 0.8159874720805957], [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655], [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426], [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057], [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977], [0.5114222820137425, 0.9816344052405028, 0.021354041800663897], [-1.066992979728017, 1.3317744591489784, -0.48617301863097473], [0.22686850357582064, -0.6558709822362698, -0.6488395287823175], [0.7797446849745541, 0.08798264257114148, 0.9232755012995165], [0.23828317402856555, 0.1996127511351823, 0.16316684686413324], [1.8341906803324939, 0.7465810512458905, -1.8839816394548647], [0.25763432631605526, -0.3878936213456354, -0.15371958469827013], [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458], [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378], [1.5710907025053915, 0.6496761658138797, -0.2693559865093303], [0.13329885341529846, -0.16092278785630468, -0.8911017423623445], [0.4560890525003599, -0.12238914766138208, 0.02436686071149516], [0.04387896406897804, 0.4783136698968656, 0.7284398601970398], [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581], [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554], [0.5963838781433183, -0.31271095971465634, -1.0048920919648563], [0.2392545368759047, -0.07763909377589129, -0.3403374652590171], [1.0458302280169298, -0.7758582051438556, -0.16412124042933254], [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372], [0.310471014472195, 0.4872791072940909, 0.18266249516014715], [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393], [0.23109436904931635, 0.627365159344662, 1.1682219467816664], [0.06169413921247506, -0.7133752514222126, 0.5373045510423942], [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393], [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554], [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261], [0.753222725431297, 0.3270011521591009, -0.33560457820400924], [-0.7786571082555904, 0.413619053069661, -1.222248027349609], [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947], [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]], 'energy': -30848.841105643754, 'volume': 2273.382588904185, 'elements': {'1': 28, '6': 48, '8': 32, '40': 6}, 'username': 'ubuntu', 'uploaded': '2023-09-06T12:30:31.588024', 'modified': '2023-09-06T12:30:31.588031', 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850', 'hash': '3768575c18d7b609556913d562d87d36', 'derived': {'arrays_keys': ['forces', 'numbers', 'positions'], 'info_keys': ['n_atoms', 'cell', 'pbc', 'formula', 'energy', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", - "CPU times: user 117 ms, sys: 0 ns, total: 117 ms\n", - "Wall time: 144 ms\n" + "114\n", + "CPU times: user 133 ms, sys: 7.48 ms, total: 140 ms\n", + "Wall time: 172 ms\n" ] } ], @@ -758,9 +1034,7 @@ "%%time\n", "os_query = 'modified: [* TO 2023-09-06T12:30:32.0000001]'\n", "print(os_abcd.count(os_query))\n", - "for items in list(os_abcd.get_items(os_query)):\n", - " print(items)\n", - " break" + "print(next(os_abcd.get_items(os_query))[\"n_atoms\"])" ] }, { @@ -782,9 +1056,9 @@ "output_type": "stream", "text": [ "152\n", - "{'_id': ObjectId('64f5bd3babc8fc69d4b2938f'), 'n_atoms': 114, 'cell': [[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], 'pbc': [True, True, True], 'formula': 'C48H28O32Zr6', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 40, 40, 40, 40, 40, 40], 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296], [10.391931260040497, 9.232075241735581, 8.799170748954813], [15.152442318761134, 3.2144705981189303, 0.6236271192356346], [15.428455018627362, 13.198368239182761, 6.757442369774353], [20.968952462595865, 8.354501228588285, 5.937790321351722], [12.821718368988067, 11.860905590260213, 0.764468940894911], [20.164574198879585, 13.449131931085539, 8.500504258460039], [5.203325638335655, 4.037525599970674, 3.6535544413570706], [6.476452578322519, 9.882112891744764, 0.7336632917566172], [14.332783438660714, 4.5739237510789845, 5.763830060388294], [12.20845295758527, 7.975607890442319, 2.7181563401019804], [26.126453831046035, 15.25865575215541, 8.681035572143871], [7.431639790543854, 7.68880010777489, 3.739705967641281], [21.52510600020679, 15.432405681052952, 8.675468268048236], [11.49107468172553, 9.60164215963523, 0.7009214784567679], [18.70674083756121, 4.607625571215378, 5.677858158016438], [17.34676875755316, 10.130528920703508, 4.483049170020872], [2.9330861621787743, 3.3990818416373494, 0.720770788622487], [22.67189915206641, 9.23882668038352, 8.661796350384211], [15.54501705742674, 14.931708899088871, 4.905010140501105], [20.180891240581246, 11.991517760259551, 0.36399634878062614], [13.537900990107627, 8.71222318139275, 4.81955270950513], [13.02082403030889, 3.7798366294145125, 3.8744821907763676], [12.738608267554484, 13.15125952920471, 8.605595280531846], [9.30512423974256, 4.003262597986021, 2.08391144947309], [13.66172535110934, 6.786427797477926, 9.451058899918706], [19.297496722626608, 6.66303528741421, 9.65403361924748], [16.364750768476505, 11.479160632545504, 9.561987519221761], [16.965060879285595, 12.039276627942046, 3.4048076978088133], [9.872075532499599, 8.868306222192839, 9.697764141289875], [23.093789800187132, 7.8655671087878325, 11.819108411864843], [15.963054660441902, 12.957563889928995, 5.796919712452191], [20.789096532494103, 7.4060484208004835, 5.418655862348822], [20.271838371834924, 15.445983071791856, 11.893743962525676], [20.235785459686173, 13.946169611391733, 9.533785527794883], [5.468638782470736, 4.963675408702813, 4.207234520017469], [13.30165782031905, 14.29182491089219, 11.848895929341783], [13.860085269446175, 5.316906102226919, 5.113333629612867], [12.690982408563865, 7.210540402496312, 3.452020777408105], [26.695527891724396, 14.913992347710204, 9.505586002333807], [6.71805210100351, 7.061748291218562, 4.294338782243351], [20.991757417986378, 15.139143269943215, 9.631524849633491], [19.51051107137078, 14.241804768445284, 11.863723702327984], [19.467115888926717, 5.338714780974876, 5.342773829476735], [17.04442989892783, 11.179573262577135, 4.5377135674632525], [9.75214492091448, 7.909665520118783, 11.856922309351365], [23.22825354947115, 8.805217731024735, 9.58442235219589], [16.00517946832365, 13.88422938185283, 4.718010810191537], [27.480789782170447, 15.480087960917762, 11.617172959535212], [13.389347998136012, 7.596486021826197, 4.594226402069221], [13.123855701056296, 4.872440299211145, 4.040302647886903], [12.64753966896389, 13.717491998222464, 9.492966517597923], [20.97530038245366, 8.335251317448973, 10.600423519640026], [16.502636637988203, 10.85079294806207, 6.900496977690235], [9.019607067373974, 1.3080109208445687, 2.1730394493971033], [12.07299373071628, 8.404520741314311, 10.81235772773731], [18.747600350981866, 12.258841111031488, 10.56065092033455], [7.015172452108002, 5.4262894076561405, 2.347590240376888], [18.78251160034123, 7.0857013283180255, 6.946566060828641], [14.224469007695566, 4.466485050187827, 10.705760127563483], [11.603451262170989, 5.349451807744306, 2.0915175410667244], [18.543838401175798, 4.375079148751448, 10.585231910862703], [14.160667831243819, 12.098710870007285, 10.795787760496086], [14.45018544044642, 7.011509947840652, 6.832073401261807], [16.477007379156557, 13.350335720646678, 3.479800617715256], [10.582432902092235, 8.377782469927169, 10.803729958995012], [22.44315777833504, 8.370354822180708, 10.648120637633818], [16.567659589367246, 11.689973911016514, 5.702162918040675], [19.752709342190204, 6.556676208576273, 5.912505297587319], [20.937902942140084, 15.937436540790538, 10.817725992408736], [19.488886710337034, 13.540165709815675, 10.668618978356523], [6.371529009087222, 5.8630806488390075, 3.652784530849877], [13.287113670442396, 13.309066380643525, 10.810269250499239], [13.887372022428522, 6.66488149144997, 5.501186858702026], [12.488596287754628, 5.853559681882998, 3.200679859004045], [26.74856971012843, 15.90075025114669, 10.529236108850316], [10.08315746894864, 1.501050200458931, 1.573941101482483], [12.62721678223712, 8.955703669760464, 9.832552292079287], [20.39919235012287, 7.4641321602901884, 11.35141521129136], [15.34144808796582, 10.482235049392207, 7.284668181164747], [19.016384691239136, 8.259448068479994, 7.407999661220768], [13.965549620748263, 5.310837048175544, 11.629395661906845], [18.964757008970206, 11.57454425853002, 9.498296919639932], [6.724169659438005, 4.266531959190234, 1.8722045706136907], [14.989309002358754, 11.848517668179902, 11.724244051601735], [15.200359309885917, 6.086405481062033, 7.400205812261053], [10.683892382055284, 6.184519761996729, 1.7395703404079805], [17.662435367568307, 4.527759521878119, 9.64210249881064], [7.857986041984485, 6.202923889200398, 1.7950802519056281], [14.981497208999828, 4.615121609824532, 9.745747168438765], [17.887354642477394, 12.0129465321045, 11.494403179921116], [17.806453279094754, 6.309886859779258, 7.194991281595532], [17.65884499015476, 10.488278242553303, 7.416379002429795], [12.548166448986402, 7.693791677648976, 11.776895469518676], [20.34393939120336, 9.206817729894354, 9.922408032378584], [7.857463468905509, 1.74909233925955, 1.8274196418121598], [18.857763430781485, 5.182306608404846, 11.489619301518633], [14.163844810128705, 8.142657329873755, 7.360230728587728], [11.792894920225645, 4.155462409412462, 1.7001921686281716], [13.835964747759567, 11.264146337910587, 9.884038668157677], [18.442882359243793, 7.121400257955315, 9.800715117804442], [16.392129968717537, 10.582139510371382, 9.857218178507331], [9.302433061412287, 3.9652007015997683, 1.0673996577713942], [14.478398449342821, 7.162072897735833, 9.816748197723141], [15.11113884153613, 9.160319431120458, 11.201390619831347], [16.44457564782678, 6.551160139418165, 11.181147839950713], [16.50639593935091, 8.350138210113135, 8.738223430491407], [18.033642210935266, 9.122422759734661, 11.23846663261546], [10.893906920444666, 2.90658841770915, -0.019051977695474183], [14.757549238860731, 9.308555112561303, 9.214183379238824], [18.225880348003468, 9.344784918068, 9.180851999025142], [7.388359137923477, 3.030521667505481, 0.14692076074094484], [9.220983449272417, 6.075785767588413, 0.00657883162539968], [16.440728831136994, 6.2934328881825925, 9.187973671216682]], 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563], [0.16925367694563342, -0.01943702713953078, 0.38893903196958485], [0.01574595116377608, 0.17132290092535438, -0.1999552221020049], [0.6313750521363777, 0.05251478601615336, -0.8064430222079316], [-0.09833287623511343, -0.138000887230052, -0.08874934559146055], [0.19781246456634455, -0.9287673780647797, -1.0439826331463689], [0.07987955323902354, 0.3227860853196942, 1.7840037712935266], [0.3716884711227413, 0.4696845328184121, 0.370453313071228], [-0.14715298673081575, -0.28619517081945, -0.2515490388965677], [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577], [0.9120773177492224, -1.200046035662623, 1.3240873743396222], [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373], [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696], [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312], [1.1617998448227365, 0.7015918847484289, -1.3710321220485349], [0.19070902021846559, 0.8070636865577138, 0.18004202662121627], [0.22735084256512936, 0.16909786808234928, -0.1733360748519467], [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667], [1.0559542045857038, -0.7522446777152361, 1.3649098107407422], [0.7762546692811232, -1.6189836731314526, -0.7746117342375823], [0.028996903632322036, -0.4549212573567351, -0.15349589870642655], [-0.30669869163043734, -1.474741689618629, -0.22373535702799768], [-0.0288004713360364, 0.7124475973319003, -0.205686211479239], [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461], [0.024624999488289372, -0.06313755663570486, -1.801891965772292], [-0.06351139506345264, -0.159376012078223, -0.050473844173332186], [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655], [0.16200265126520474, 1.5060489867253262, -0.5510645819751359], [0.11499568285511277, -1.536591123473717, -0.46194088373525655], [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978], [1.3289642431824835, 0.5107142001499065, -0.8944297785444403], [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165], [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311], [-1.8548078439117515, 0.38714131650410166, 2.989239836187592], [0.4506861359111631, 1.1364487657471294, -0.6025277867192889], [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987], [1.0444392611021323, -2.684139342402327, -1.0437898003947796], [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224], [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265], [0.1290364782741685, 1.0164497157641128, 0.008965437397225301], [0.2684093062541945, -0.321198686108611, -1.4872871313266933], [-0.643392389215423, -0.898736890883939, -1.098486424717582], [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579], [0.8447910287406576, -1.5317872739661245, -1.3569157361908561], [0.17489879147082638, 0.8299521628403577, -1.920810638125738], [0.3438521635446514, 0.034021868028403804, 0.7196574853586566], [-0.49089716393457716, 1.32019318119886, -0.94368029174169], [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858], [-0.5808395014825368, 3.6184937280564875, 2.299079591174048], [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825], [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758], [0.545840099958743, 0.6951430433146117, 4.287385291900738], [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216], [1.0342078124131395, -1.450620626387826, 0.9064692271123347], [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989], [-1.71670514098886, -1.4817345765222634, 0.997617412133566], [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182], [0.25302742332541905, -0.05521598720041631, 0.07509935788229247], [0.4694140527455162, -0.5513201496485756, -0.6341384740218201], [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704], [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858], [0.303680730512897, -1.6201972339147879, -2.1682659662612322], [-1.573935371256837, -0.7716241121396255, 0.4766830761494265], [0.8572325978731995, -0.38723439044553537, 0.7948401469896356], [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165], [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755], [0.898834592811411, -1.0161931196493315, 0.970241846276607], [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992], [-0.24175827732254024, 1.3876321937466838, 0.534561183763117], [0.6898959355886991, -1.1971756558776325, -2.6977317373963534], [0.3469128049778768, -0.8706239325849023, -0.7535286867304852], [0.5164133078455445, -1.1737287359463862, -0.8774512404325054], [0.07070174270471455, 2.0437350895055317, -2.2527981881306025], [0.3288256070994712, -0.3856912142121501, -0.4543299035851946], [0.46220673582211436, -1.5513682828935087, 0.2903063650833339], [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991], [1.9752635218505312, 0.3340079230208498, -1.0452476159967947], [0.8562020996487061, 0.4789574741768384, -0.4695076409076208], [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598], [0.4557039012178522, 0.3706605440016026, 0.8159874720805957], [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655], [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426], [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057], [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977], [0.5114222820137425, 0.9816344052405028, 0.021354041800663897], [-1.066992979728017, 1.3317744591489784, -0.48617301863097473], [0.22686850357582064, -0.6558709822362698, -0.6488395287823175], [0.7797446849745541, 0.08798264257114148, 0.9232755012995165], [0.23828317402856555, 0.1996127511351823, 0.16316684686413324], [1.8341906803324939, 0.7465810512458905, -1.8839816394548647], [0.25763432631605526, -0.3878936213456354, -0.15371958469827013], [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458], [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378], [1.5710907025053915, 0.6496761658138797, -0.2693559865093303], [0.13329885341529846, -0.16092278785630468, -0.8911017423623445], [0.4560890525003599, -0.12238914766138208, 0.02436686071149516], [0.04387896406897804, 0.4783136698968656, 0.7284398601970398], [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581], [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554], [0.5963838781433183, -0.31271095971465634, -1.0048920919648563], [0.2392545368759047, -0.07763909377589129, -0.3403374652590171], [1.0458302280169298, -0.7758582051438556, -0.16412124042933254], [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372], [0.310471014472195, 0.4872791072940909, 0.18266249516014715], [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393], [0.23109436904931635, 0.627365159344662, 1.1682219467816664], [0.06169413921247506, -0.7133752514222126, 0.5373045510423942], [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393], [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554], [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261], [0.753222725431297, 0.3270011521591009, -0.33560457820400924], [-0.7786571082555904, 0.413619053069661, -1.222248027349609], [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947], [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]], 'energy': -30848.841105643754, 'volume': 2273.382588904185, 'elements': {'1': 28, '6': 48, '8': 32, '40': 6}, 'username': 'ubuntu', 'uploaded': datetime.datetime(2023, 9, 4, 11, 19, 23, 722000), 'modified': datetime.datetime(2023, 9, 4, 11, 19, 23, 722000), 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850', 'hash': '919ff4a70b553ddf39772a76d202ebce', 'derived': {'arrays_keys': ['numbers', 'positions', 'forces'], 'info_keys': ['formula', 'pbc', 'n_atoms', 'energy', 'cell', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", - "CPU times: user 16.2 ms, sys: 1.81 ms, total: 18 ms\n", - "Wall time: 586 ms\n" + "114\n", + "CPU times: user 17.8 ms, sys: 3.97 ms, total: 21.8 ms\n", + "Wall time: 688 ms\n" ] } ], @@ -794,9 +1068,57 @@ " 'modified': {'$lt': datetime.fromisoformat('2023-09-04T11:19:24.310')}\n", "}\n", "print(mongo_abcd.count(mongo_query))\n", - "for items in list(mongo_abcd.get_items(mongo_query)):\n", - " print(items)\n", - " break" + "print(next(mongo_abcd.get_items(mongo_query))[\"n_atoms\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "109\n", + "2350.374085846946\n", + "CPU times: user 141 ms, sys: 22.5 ms, total: 164 ms\n", + "Wall time: 194 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "os_query = 'author: Jan* AND Metal: Ag AND Space_group: Pbca'\n", + "print(os_abcd.count(os_query))\n", + "print(next(os_abcd.get_atoms(os_query)).get_volume())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "109\n", + "2439.0408078794635\n", + "CPU times: user 32.7 ms, sys: 7.99 ms, total: 40.7 ms\n", + "Wall time: 1.25 s\n" + ] + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " \"author\": {'$regex': 'Jan*'},\n", + " \"Metal\": \"Ag\",\n", + " \"Space_group\": \"Pbca\"\n", + "}\n", + "print(mongo_abcd.count(mongo_query))\n", + "print(next(mongo_abcd.get_atoms(mongo_query)).get_volume())" ] } ], @@ -816,7 +1138,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.13" }, "orig_nbformat": 4 }, From dbeed8a7bef120e3e6006da5f689782cf3885ea0 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Fri, 12 Jan 2024 17:06:37 +0000 Subject: [PATCH 047/112] Fix OpenSearch CLI queries Also changes implementation for MongoDB to enable lists of queries in for non-CLI use. --- abcd/backends/atoms_opensearch.py | 13 +++++++++++-- abcd/backends/atoms_pymongo.py | 9 +++++++++ abcd/frontends/commandline/decorators.py | 20 +++++++------------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index a8e50506..1645f173 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -78,13 +78,13 @@ def __init__( else: self.query_builder = ElasticsearchQueryBuilder() - def __call__(self, query: Union[dict, str, None]) -> Union[dict, None]: + def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: """ Parses and builds queries from strings using ElasticsearchQueryBuilder. Parameters ---------- - query: Union[dict, str, None] + query: Union[dict, str, list, None] Query to be parsed for OpenSearch. If given as a dictionary, the query is left unchanged. If given as a string, the ElasticsearchQueryBuilder is used to build the query. @@ -104,6 +104,15 @@ def __call__(self, query: Union[dict, str, None]) -> Union[dict, None]: elif isinstance(query, str): tree = parser.parse(query) return self.query_builder(tree) + elif isinstance(query, list): + if len(query) == 0: + return None + elif query[0] is None: + return None + separator = " AND " + joined_query = separator.join(query) + tree = parser.parse(joined_query) + return self.query_builder(tree) return query if query else None diff --git a/abcd/backends/atoms_pymongo.py b/abcd/backends/atoms_pymongo.py index ac702fcb..a34cc7d3 100644 --- a/abcd/backends/atoms_pymongo.py +++ b/abcd/backends/atoms_pymongo.py @@ -133,6 +133,15 @@ def __call__(self, ast): p = parser(ast) return self.visit(p) + elif isinstance(ast, list): + from abcd.parsers.queries import parser + + if len(ast) == 0: + return {} + else: + ast = ("AND", *[parser(q) for q in ast]) + return self.visit(ast) + return self.visit(ast) if ast else {} diff --git a/abcd/frontends/commandline/decorators.py b/abcd/frontends/commandline/decorators.py index 8fb37499..201f6000 100644 --- a/abcd/frontends/commandline/decorators.py +++ b/abcd/frontends/commandline/decorators.py @@ -1,8 +1,7 @@ import logging - +import functools from abcd import ABCD from abcd.frontends.commandline.config import Config -from abcd.parsers.queries import parser logger = logging.getLogger(__name__) @@ -10,6 +9,7 @@ def init_config(func): config = Config.load() + @functools.wraps(func) def wrapper(*args, **kwargs): func(*args, config=config, **kwargs) @@ -17,6 +17,7 @@ def wrapper(*args, **kwargs): def init_db(func): + @functools.wraps(func) def wrapper(*args, config, **kwargs): url = config.get("url", None) @@ -27,29 +28,22 @@ def wrapper(*args, config, **kwargs): db = ABCD.from_url(url=url) # TODO: AST.from_string() ?! - # TODO: parser should accept list # TODO: better ast optimisation query_list = [] for q in kwargs.pop("default_query", []): - query_list.append(parser(q)) + query_list.append(q) for q in kwargs.pop("query", []): - query_list.append(parser(q)) - - if not query_list: - query = None - elif len(query_list) == 1: - query = query_list[0] - else: - query = ("AND", *query_list) + query_list.append(q) - func(*args, db=db, query=query, **kwargs) + func(*args, db=db, query=query_list, **kwargs) return wrapper def check_remote(func): + @functools.wraps(func) def wrapper(*args, **kwargs): if kwargs.pop("remote"): print("In read only mode, you can't modify the data in the database") From 6c303028af3533afe50eba3183c71bf3b5ab4ecd Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 22 Jan 2024 15:15:11 +0000 Subject: [PATCH 048/112] Tidy unit tests --- tests/{ => data}/examples.csv | 0 tests/mongo_mock.py | 52 +++++++++++++++ tests/opensearch.py | 58 ++++------------ tests/{database.py => opensearch_mock.py} | 81 ++++++----------------- tests/properties.py | 10 +-- 5 files changed, 90 insertions(+), 111 deletions(-) rename tests/{ => data}/examples.csv (100%) create mode 100644 tests/mongo_mock.py rename tests/{database.py => opensearch_mock.py} (74%) diff --git a/tests/examples.csv b/tests/data/examples.csv similarity index 100% rename from tests/examples.csv rename to tests/data/examples.csv diff --git a/tests/mongo_mock.py b/tests/mongo_mock.py new file mode 100644 index 00000000..5d583fdc --- /dev/null +++ b/tests/mongo_mock.py @@ -0,0 +1,52 @@ +from io import StringIO +import logging +import unittest + +from ase.io import read +from ase.atoms import Atoms +import mongomock + +from abcd import ABCD + + +class MongoMock(unittest.TestCase): + @classmethod + @mongomock.patch(servers=(("localhost", 27017),)) + def setUpClass(cls): + logging.basicConfig(level=logging.INFO) + url = "mongodb://localhost" + abcd = ABCD.from_url(url) + abcd.print_info() + + cls.abcd = abcd + + @classmethod + def tearDownClass(cls): + cls.abcd.destroy() + + def test_thing(self): + print(self.abcd.info()) + + def test_push(self): + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + + self.abcd.destroy() + self.abcd.push(atoms) + new = list(self.abcd.get_atoms())[0] + + assert atoms == new + self.abcd.destroy() + + +if __name__ == "__main__": + unittest.main(verbosity=1, exit=False) diff --git a/tests/opensearch.py b/tests/opensearch.py index 05d3bd49..e7abc41f 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -1,6 +1,15 @@ +from io import StringIO +import logging +import os +from time import sleep import unittest + +from ase.atoms import Atoms +from ase.io import read +from opensearchpy.exceptions import ConnectionError + from abcd import ABCD -import logging +from abcd.backends.atoms_opensearch import AtomsModel, OpenSearchDatabase class OpenSearch(unittest.TestCase): @@ -11,13 +20,8 @@ class OpenSearch(unittest.TestCase): @classmethod def setUpClass(cls): """ - Set up database connection. + Set up OpenSearch database connection. """ - import os - from time import sleep - from abcd.backends.atoms_opensearch import OpenSearchDatabase - from opensearchpy.exceptions import ConnectionError - if os.getenv("GITHUB_ACTIONS") != "true": raise unittest.SkipTest("Only runs via GitHub Actions") cls.security_enabled = os.getenv("security_enabled") == "true" @@ -34,7 +38,7 @@ def setUpClass(cls): analyse_schema=False, use_ssl=cls.security_enabled, ) - except ConnectionError or ConnectionResetError: + except (ConnectionError, ConnectionResetError): sleep(10) abcd = ABCD.from_url( url, @@ -49,18 +53,14 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): """ - Delete index from database. + Delete index from OpenSearch database. """ cls.abcd.destroy() def push_data(self): """ - Uploads an example xyz file to the database. + Helper function to upload an example xyz file to the database. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - xyz = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -105,7 +105,6 @@ def test_destroy(self): self.abcd.destroy() self.assertFalse(self.abcd.client.indices.exists("test_index")) - return def test_create(self): """ @@ -121,11 +120,6 @@ def test_push(self): """ Test pushing atoms objects to database individually. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - from abcd.backends.atoms_opensearch import AtomsModel - self.abcd.destroy() self.abcd.create() xyz_1 = StringIO( @@ -172,17 +166,11 @@ def test_delete(self): self.assertTrue(self.abcd.client.indices.exists("test_index")) self.abcd.refresh() self.assertEqual(self.abcd.count(), 0) - return def test_bulk(self): """ Test pushing atoms object to database together. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - from abcd.backends.atoms_opensearch import AtomsModel - self.abcd.destroy() self.abcd.create() xyz_1 = StringIO( @@ -240,10 +228,6 @@ def test_property(self): """ Test getting values of a property from the database. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - self.abcd.destroy() self.abcd.create() @@ -305,10 +289,6 @@ def test_count_property(self): """ Test counting values of specified properties from the database. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - self.abcd.destroy() self.abcd.create() @@ -346,10 +326,6 @@ def test_count_properties(self): """ Test counting appearences of each property in documents in the database. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - self.abcd.destroy() self.abcd.create() @@ -548,8 +524,6 @@ def test_get_atoms(self): """ Test getting values from documents in the database as Atoms objects. """ - from ase.atoms import Atoms - self.abcd.destroy() self.abcd.create() self.push_data() @@ -560,10 +534,6 @@ def test_query(self): """ Test querying documents in the database. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - self.abcd.destroy() self.abcd.create() diff --git a/tests/database.py b/tests/opensearch_mock.py similarity index 74% rename from tests/database.py rename to tests/opensearch_mock.py index 92e9fb92..6f0959c6 100644 --- a/tests/database.py +++ b/tests/opensearch_mock.py @@ -1,55 +1,19 @@ +from importlib import reload +from io import StringIO +import logging +import os import unittest -import mongomock + +from ase.atoms import Atoms +from ase.io import read from openmock import openmock from abcd import ABCD -import logging - - -class Mongo(unittest.TestCase): - @classmethod - @mongomock.patch(servers=(("localhost", 27017),)) - def setUpClass(cls): - logging.basicConfig(level=logging.INFO) - url = "mongodb://localhost" - abcd = ABCD.from_url(url) - abcd.print_info() - - cls.abcd = abcd +from abcd.backends import atoms_opensearch +from abcd.backends.atoms_opensearch import AtomsModel - @classmethod - def tearDownClass(cls): - cls.abcd.destroy() - - def test_thing(self): - print(self.abcd.info()) - - def test_push(self): - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - xyz = StringIO( - """2 - Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" - Si 0.00000000 0.00000000 0.00000000 - Si 0.00000000 0.00000000 0.00000000 - """ - ) - - atoms = read(xyz, format="extxyz") - assert isinstance(atoms, Atoms) - atoms.set_cell([1, 1, 1]) - - self.abcd.destroy() - self.abcd.push(atoms) - new = list(self.abcd.get_atoms())[0] - - assert atoms == new - self.abcd.destroy() - - -class OpenSearch(unittest.TestCase): +class OpenSearchMock(unittest.TestCase): """ Testing mock OpenSearch database functions. """ @@ -60,10 +24,18 @@ def setUpClass(cls): """ Set up database connection. """ + reload(atoms_opensearch) from abcd.backends.atoms_opensearch import OpenSearchDatabase + if "port" in os.environ: + cls.port = int(os.environ["port"]) + else: + cls.port = 9200 + cls.host = "localhost" + logging.basicConfig(level=logging.INFO) - url = "opensearch://admin:admin@localhost:9200" + + url = f"opensearch://admin:admin@{cls.host}:{cls.port}" abcd = ABCD.from_url(url, index_name="test_index", analyse_schema=False) assert isinstance(abcd, OpenSearchDatabase) cls.abcd = abcd @@ -82,7 +54,6 @@ def test_destroy(self): self.assertTrue(self.abcd.client.indices.exists("test_index")) self.abcd.destroy() self.assertFalse(self.abcd.client.indices.exists("test_index")) - return def test_create(self): """ @@ -97,11 +68,6 @@ def test_push(self): """ Test pushing atoms objects to database individually. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - from abcd.backends.atoms_opensearch import AtomsModel - self.abcd.destroy() self.abcd.create() xyz_1 = StringIO( @@ -139,11 +105,6 @@ def test_bulk(self): """ Test pushing atoms object to database together. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - from abcd.backends.atoms_opensearch import AtomsModel - self.abcd.destroy() self.abcd.create() xyz_1 = StringIO( @@ -190,10 +151,6 @@ def test_count(self): """ Test counting the number of documents in the database. """ - from io import StringIO - from ase.io import read - from ase.atoms import Atoms - self.abcd.destroy() self.abcd.create() xyz = StringIO( diff --git a/tests/properties.py b/tests/properties.py index 1afab1b7..76177cf9 100644 --- a/tests/properties.py +++ b/tests/properties.py @@ -1,4 +1,8 @@ +import os import unittest + +from pandas import DataFrame + from abcd.backends.atoms_properties import Properties @@ -10,18 +14,14 @@ def setUpClass(cls): """ Load example data file. """ - import os - class_path = os.path.normpath(os.path.abspath(__file__)) - data_file = os.path.dirname(class_path) + "/examples.csv" + data_file = os.path.dirname(class_path) + "/data/examples.csv" cls.property = Properties(data_file) def test_dataframe(self): """ Test data correctly stored in pandas DataFrame. """ - from pandas import DataFrame - assert isinstance(self.property.df, DataFrame) assert len(self.property.df) == 3 From 94aa1a9e809342183e06411005884edcb762b7b2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 22 Jan 2024 15:16:05 +0000 Subject: [PATCH 049/112] Add option to disabled SSL --- abcd/frontends/commandline/commands.py | 5 +++-- abcd/frontends/commandline/decorators.py | 7 ++++++- abcd/frontends/commandline/parser.py | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index 5d7d7015..c237947a 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -9,16 +9,17 @@ @init_config -def login(*, config, name, url, **kwargs): +def login(*, config, name, url, disable_ssl=False, **kwargs): logger.info( f"login args: \nconfig:{config}, name:{name}, url:{url}, kwargs:{kwargs}" ) from abcd import ABCD - db = ABCD.from_url(url=url) + db = ABCD.from_url(url=url, use_ssl=(not disable_ssl)) info = db.info() config["url"] = url + config["use_ssl"] = not disable_ssl config.save() print("Successfully connected to the database!") diff --git a/abcd/frontends/commandline/decorators.py b/abcd/frontends/commandline/decorators.py index 201f6000..ce509004 100644 --- a/abcd/frontends/commandline/decorators.py +++ b/abcd/frontends/commandline/decorators.py @@ -20,12 +20,17 @@ def init_db(func): @functools.wraps(func) def wrapper(*args, config, **kwargs): url = config.get("url", None) + use_ssl = config.get("use_ssl", None) if url is None: print("Please use abcd login first!") exit(1) - db = ABCD.from_url(url=url) + if use_ssl is None: + print("use_ssl has not been saved. Please login again") + exit(1) + + db = ABCD.from_url(url=url, use_ssl=use_ssl) # TODO: AST.from_string() ?! # TODO: better ast optimisation diff --git a/abcd/frontends/commandline/parser.py b/abcd/frontends/commandline/parser.py index 71aaca19..51b6dbda 100644 --- a/abcd/frontends/commandline/parser.py +++ b/abcd/frontends/commandline/parser.py @@ -37,6 +37,11 @@ help="url of abcd api (default: http://localhost)", default="http://localhost", ) +login_parser.add_argument( + "--disable_ssl", + action="store_true", + help="Disable SSL encryption", +) download_parser = subparsers.add_parser( "download", help="download data from the database" From 3cb7dedd479e947ffb6ca65f60f23b3cb2e9ca19 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 22 Jan 2024 15:16:42 +0000 Subject: [PATCH 050/112] Add initial CLI integration tests --- tests/cli.py | 41 +++++++++++++++++++++++++++++++++++++++++ tests/data/example.xyz | 4 ++++ 2 files changed, 45 insertions(+) create mode 100644 tests/cli.py create mode 100644 tests/data/example.xyz diff --git a/tests/cli.py b/tests/cli.py new file mode 100644 index 00000000..4149ecc8 --- /dev/null +++ b/tests/cli.py @@ -0,0 +1,41 @@ +import os +import unittest +import logging +from time import sleep + + +class CLI(unittest.TestCase): + """ + Testing OpenSearch database CLI integration. + """ + + @classmethod + def setUpClass(cls): + """ + Set up OpenSearch database connection and login with CLI. + """ + if os.getenv("GITHUB_ACTIONS") != "true": + raise unittest.SkipTest("Only runs via GitHub Actions") + cls.security_enabled = os.getenv("security_enabled") == "true" + cls.port = int(os.environ["port"]) + cls.host = "localhost" + + logging.basicConfig(level=logging.INFO) + + url = f"opensearch://admin:admin@{cls.host}:{cls.port}" + if not cls.security_enabled: + url += " --disable_ssl" + try: + os.system(f"abcd login {url}") + except (ConnectionError, ConnectionResetError): + sleep(10) + os.system(f"abcd login {url}") + + def test_summary(self): + """ + Test summary output of stored data. + """ + class_path = os.path.normpath(os.path.abspath(__file__)) + data_file = os.path.dirname(class_path) + "/data/example.xyz" + os.system(f"abcd upload {data_file}") + os.system("abcd summary") diff --git a/tests/data/example.xyz b/tests/data/example.xyz new file mode 100644 index 00000000..452b9d67 --- /dev/null +++ b/tests/data/example.xyz @@ -0,0 +1,4 @@ +2 +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" +Si 0.00000000 1.00000000 2.00000000 +Si 4.00000000 5.00000000 6.00000000 From 4938a1208f5164c94a8cbaea92da1de41a207b2b Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 22 Jan 2024 16:25:13 +0000 Subject: [PATCH 051/112] Catch process errors in CLI tests --- tests/cli.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/cli.py b/tests/cli.py index 4149ecc8..40820251 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -1,4 +1,5 @@ import os +import subprocess import unittest import logging from time import sleep @@ -26,16 +27,19 @@ def setUpClass(cls): if not cls.security_enabled: url += " --disable_ssl" try: - os.system(f"abcd login {url}") - except (ConnectionError, ConnectionResetError): + subprocess.run(f"abcd login {url}", shell=True, check=True) + except subprocess.CalledProcessError: sleep(10) - os.system(f"abcd login {url}") + subprocess.run(f"abcd login {url}", shell=True, check=True) def test_summary(self): """ - Test summary output of stored data. + Test summary output of uploaded data file. """ class_path = os.path.normpath(os.path.abspath(__file__)) data_file = os.path.dirname(class_path) + "/data/example.xyz" - os.system(f"abcd upload {data_file}") - os.system("abcd summary") + subprocess.run(f"abcd upload {data_file}", shell=True, check=True) + summary = subprocess.run( + "abcd summary", shell=True, check=True, capture_output=True, text=True + ) + assert "Total number of configurations: 1" in summary.stdout From 0f966a49517a22e4a3d01492b5fc514ef8248243 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 24 Jan 2024 15:32:15 +0000 Subject: [PATCH 052/112] Add chunk and timeout kwargs for bulk push --- abcd/backends/atoms_opensearch.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 1645f173..cea802cb 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -383,7 +383,7 @@ def refresh(self): """ self.client.indices.refresh(index=self.index_name) - def save_bulk(self, actions: Iterable): + def save_bulk(self, actions: Iterable, **kwargs): """ Save a collection of documents in bulk. @@ -392,13 +392,22 @@ def save_bulk(self, actions: Iterable): actions: Iterable Documents to be saved. """ - helpers.bulk(client=self.client, actions=actions, index=self.index_name) + request_timeout = kwargs.get("request_timeout", 30) + chunk_size = kwargs.get("chunk_size", 500) + helpers.bulk( + client=self.client, + actions=actions, + index=self.index_name, + chunk_size=chunk_size, + request_timeout=request_timeout, + ) def push( self, atoms: Union[Atoms, Iterable], extra_info: Union[dict, str, list, None] = None, store_calc: bool = True, + **kwargs, ): """ Save data from atoms object(s) to database. @@ -446,7 +455,7 @@ def push( ) actions.append(data.data) actions[-1]["derived"] = data.derived - self.save_bulk(actions) + self.save_bulk(actions, **kwargs) def upload( self, From 1e7dfd1f6275de815988bb315024d633ef2adc56 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 24 Jan 2024 16:14:54 +0000 Subject: [PATCH 053/112] Refactor backend code --- abcd/backends/atoms_opensearch.py | 212 +++++------------------------- abcd/backends/atoms_pymongo.py | 136 +------------------ abcd/backends/utils.py | 131 ++++++++++++++++++ 3 files changed, 171 insertions(+), 308 deletions(-) create mode 100644 abcd/backends/utils.py diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index cea802cb..de0ed534 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -1,30 +1,25 @@ from __future__ import annotations -import logging - from collections.abc import Generator +from datetime import datetime from typing import Union, Iterable +import logging from os import linesep -from datetime import datetime -from collections import Counter -from operator import itemgetter from pathlib import Path -import numpy as np - from ase import Atoms from ase.io import iread +from luqum.parser import parser +from luqum.elasticsearch import SchemaAnalyzer, ElasticsearchQueryBuilder +from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout +from abcd.backends import utils +from abcd.database import AbstractABCD import abcd.errors from abcd.model import AbstractModel -from abcd.database import AbstractABCD -from abcd.queryset import AbstractQuerySet from abcd.parsers import extras +from abcd.queryset import AbstractQuerySet -from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout - -from luqum.parser import parser -from luqum.elasticsearch import SchemaAnalyzer, ElasticsearchQueryBuilder logger = logging.getLogger(__name__) @@ -94,20 +89,19 @@ def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: Union[dict, None] The parsed query for OpenSearch. """ - logger.info("parsed query: {}".format(query)) - if not query: query = self.get_default_query() + logger.info("parsed query: %s", query) if isinstance(query, dict): return query - elif isinstance(query, str): + if isinstance(query, str): tree = parser.parse(query) return self.query_builder(tree) - elif isinstance(query, list): + if isinstance(query, list): if len(query) == 0: return None - elif query[0] is None: + if query[0] is None: return None separator = " AND " joined_query = separator.join(query) @@ -244,7 +238,7 @@ class OpenSearchDatabase(AbstractABCD): ---------- client: OpenSearch OpenSearch client. - db: str + db_name: str Database name. index_name: str OpenSearch index name. @@ -256,7 +250,7 @@ def __init__( self, host: str = "localhost", port: int = 9200, - db: str = "abcd", + db_name: str = "abcd", index_name: str = "atoms", username: str = "admin", password: str = "admin", @@ -272,7 +266,7 @@ def __init__( Name of OpenSearch host. Default is `localhost`. port: int, optional OpenSearch port. Default is `9200`. - db: str, optional + db_name: str, optional Label for OpenSearch database. Used only when printing information. Default is `abcd`. index_name: str, optional @@ -309,7 +303,7 @@ def __init__( try: info = self.client.info() - logger.info("DB info: {}".format(info)) + logger.info("DB info: %s", info) except AuthenticationException: raise abcd.errors.AuthenticationError() @@ -317,7 +311,7 @@ def __init__( except ConnectionTimeout: raise abcd.errors.TimeoutError() - self.db = db + self.db = db_name self.index_name = index_name self.create() self.parser = OpenSearchQuery(self.client, self.index_name, analyse_schema) @@ -558,8 +552,8 @@ def count(self, query: Union[dict, str, None] = None) -> int: ------- Count of number of documents. """ - logger.info("query; {}".format(query)) query = self.parser(query) + logger.info("parsed query: %s", query) body = { "query": query, } @@ -686,7 +680,7 @@ def properties(self, query: Union[dict, str, None] = None) -> dict: for label in derived: count = res["aggregations"][label]["doc_count"] if count > 0: - key = label.split("_")[0] + key = label.split("_", maxsplit=1)[0] if key in properties: properties[key].append(prop) else: @@ -710,8 +704,6 @@ def get_type_of_property(self, prop: str, category: str) -> str: ------- Type of the property. """ - # TODO: Probably it would be nicer to store the type info in the database - # from the beginning. atoms = self.client.search( index=self.index_name, body={"size": 1, "query": {"exists": {"field": prop}}}, @@ -720,23 +712,19 @@ def get_type_of_property(self, prop: str, category: str) -> str: data = atoms["hits"]["hits"][0]["_source"][prop] if category == "arrays": - if type(data[0]) == list: + if isinstance(data[0], list): return "array({}, N x {})".format( map_types[type(data[0][0])], len(data[0]) ) - else: - return "vector({}, N)".format(map_types[type(data[0])]) + return "vector({}, N)".format(map_types[type(data[0])]) - if type(data) == list: - if type(data[0]) == list: - if type(data[0][0]) == list: + if isinstance(data, list): + if isinstance(data[0], list): + if isinstance(data[0][0], list): return "list(list(...)" - else: - return "array({})".format(map_types[type(data[0][0])]) - else: - return "vector({})".format(map_types[type(data[0])]) - else: - return "scalar({})".format(map_types[type(data)]) + return "array({})".format(map_types[type(data[0][0])]) + return "vector({})".format(map_types[type(data[0])]) + return "scalar({})".format(map_types[type(data)]) def count_properties(self, query: Union[dict, str, None] = None) -> dict: """ @@ -790,8 +778,10 @@ def count_properties(self, query: Union[dict, str, None] = None) -> dict: if count > 0: properties[key] = { "count": count, - "category": label.split("_")[0], - "dtype": self.get_type_of_property(key, label.split("_")[0]), + "category": label.split("_", maxsplit=1)[0], + "dtype": self.get_type_of_property( + key, label.split("_", maxsplit=1)[0] + ), } return properties @@ -807,8 +797,8 @@ def add_property(self, data: dict, query: Union[dict, str, None] = None): query: Union[dict, str, None] Query to filter documents to add properties to. Default is `None`. """ - logger.info("add: data={}, query={}".format(data, query)) query = self.parser(query) + logger.info("add: data=%s, query=%s", data, query) script_txt = "ctx._source.derived.info_keys.addAll(params.keys);" for key, val in data.items(): @@ -843,8 +833,8 @@ def rename_property( query: Union[dict, str, None] Query to filter documents to rename property. Default is `None`. """ - logger.info("rename: query={}, old={}, new={}".format(query, name, new_name)) query = self.parser(query) + logger.info("rename: query=%s, old=%s, new=%s", query, name, new_name) script_txt = f"if (!ctx._source.containsKey('{new_name}')) {{ " script_txt += ( @@ -877,8 +867,8 @@ def delete_property(self, name: str, query: Union[dict, str, None] = None): query: Union[dict, str, None] Query to filter documents to have property deleted. Default is `None`. """ - logger.info("delete: query={}, porperty={}".format(name, query)) query = self.parser(query) + logger.info("delete: query=%s, porperty=%s", name, query) script_txt = f"if (ctx._source.containsKey('{name}')) {{ " script_txt += "ctx._source.remove(params.name);" @@ -920,7 +910,7 @@ def hist( query = self.parser(query) data = self.property(name, query) - return histogram(name, data, **kwargs) + return utils.histogram(name, data, **kwargs) def __repr__(self): """ @@ -974,138 +964,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass -def histogram(name, data, **kwargs): - if not data: - return None - - elif data and isinstance(data, list): - ptype = type(data[0]) - - if not all(isinstance(x, ptype) for x in data): - print("Mixed type error of the {} property!".format(name)) - return None - - if ptype == float: - bins = kwargs.get("bins", 10) - return _hist_float(name, data, bins) - - elif ptype == int: - bins = kwargs.get("bins", 10) - return _hist_int(name, data, bins) - - elif ptype == str: - return _hist_str(name, data, **kwargs) - - elif ptype == datetime: - bins = kwargs.get("bins", 10) - return _hist_date(name, data, bins) - - else: - print( - "{}: Histogram for list of {} types are not supported!".format( - name, type(data[0]) - ) - ) - logger.info( - "{}: Histogram for list of {} types are not supported!".format( - name, type(data[0]) - ) - ) - - else: - logger.info( - "{}: Histogram for {} types are not supported!".format(name, type(data)) - ) - return None - - -def _hist_float(name, data, bins=10): - data = np.array(data) - hist, bin_edges = np.histogram(data, bins=bins) - - return { - "type": "hist_float", - "name": name, - "bins": bins, - "edges": bin_edges, - "counts": hist, - "min": data.min(), - "max": data.max(), - "median": data.mean(), - "std": data.std(), - "var": data.var(), - } - - -def _hist_date(name, data, bins=10): - hist_data = np.array([t.timestamp() for t in data]) - hist, bin_edges = np.histogram(hist_data, bins=bins) - - fromtimestamp = datetime.fromtimestamp - - return { - "type": "hist_date", - "name": name, - "bins": bins, - "edges": [fromtimestamp(d) for d in bin_edges], - "counts": hist, - "min": fromtimestamp(hist_data.min()), - "max": fromtimestamp(hist_data.max()), - "median": fromtimestamp(hist_data.mean()), - "std": fromtimestamp(hist_data.std()), - "var": fromtimestamp(hist_data.var()), - } - - -def _hist_int(name, data, bins=10): - data = np.array(data) - delta = max(data) - min(data) + 1 - - if bins > delta: - bins = delta - - hist, bin_edges = np.histogram(data, bins=bins) - - return { - "type": "hist_int", - "name": name, - "bins": bins, - "edges": bin_edges, - "counts": hist, - "min": data.min(), - "max": data.max(), - "median": data.mean(), - "std": data.std(), - "var": data.var(), - } - - -def _hist_str(name, data, bins=10, truncate=20): - n_unique = len(set(data)) - - if truncate: - # data = (item[:truncate] for item in data) - data = ( - item[:truncate] + "..." if len(item) > truncate else item for item in data - ) - - data = Counter(data) - - if bins: - labels, counts = zip(*sorted(data.items(), key=itemgetter(1, 0), reverse=True)) - else: - labels, counts = zip(*data.items()) - - return { - "type": "hist_str", - "name": name, - "total": sum(data.values()), - "unique": n_unique, - "labels": labels[:bins], - "counts": counts[:bins], - } - - if __name__ == "__main__": db = OpenSearchDatabase(username="admin", password="admin") print(db.info()) diff --git a/abcd/backends/atoms_pymongo.py b/abcd/backends/atoms_pymongo.py index a34cc7d3..c13edc6e 100644 --- a/abcd/backends/atoms_pymongo.py +++ b/abcd/backends/atoms_pymongo.py @@ -1,8 +1,6 @@ -from collections import Counter from collections.abc import Iterable from datetime import datetime import logging -from operator import itemgetter from os import linesep from pathlib import Path import types @@ -11,10 +9,10 @@ from ase import Atoms from ase.io import iread from bson import ObjectId -import numpy as np from pymongo import MongoClient import pymongo.errors +from abcd.backends import utils from abcd.database import AbstractABCD import abcd.errors from abcd.model import AbstractModel @@ -133,14 +131,13 @@ def __call__(self, ast): p = parser(ast) return self.visit(p) - elif isinstance(ast, list): + if isinstance(ast, list): from abcd.parsers.queries import parser if len(ast) == 0: return {} - else: - ast = ("AND", *[parser(q) for q in ast]) - return self.visit(ast) + ast = ("AND", *[parser(q) for q in ast]) + return self.visit(ast) return self.visit(ast) if ast else {} @@ -433,7 +430,7 @@ def delete_property(self, name, query=None): def hist(self, name, query=None, **kwargs): data = self.property(name, query) - return histogram(name, data, **kwargs) + return utils.histogram(name, data, **kwargs) def exec(self, code, query=None): # TODO: Separate python environment with its own packages loaded @@ -476,129 +473,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass -def histogram(name, data, **kwargs): - if not data: - return None - - if data and isinstance(data, list): - ptype = type(data[0]) - - if not all(isinstance(x, ptype) for x in data): - print(f"Mixed type error of the {name} property!") - return None - - if isinstance(data[0], float): - bins = kwargs.get("bins", 10) - return _hist_float(name, data, bins) - - if isinstance(data[0], int): - bins = kwargs.get("bins", 10) - return _hist_int(name, data, bins) - - if isinstance(data[0], str): - return _hist_str(name, data, **kwargs) - - if isinstance(data[0], datetime): - bins = kwargs.get("bins", 10) - return _hist_date(name, data, bins) - - print(f"{name}: Histogram for list of {type(data[0])} types are not supported!") - logger.info( - f"{name}: Histogram for list of {type(data[0])} types are not supported!" - ) - return None - - logger.info(f"{name}: Histogram for {type(data)} types are not supported!") - return None - - -def _hist_float(name, data, bins=10): - data = np.array(data) - hist, bin_edges = np.histogram(data, bins=bins) - - return { - "type": "hist_float", - "name": name, - "bins": bins, - "edges": bin_edges, - "counts": hist, - "min": data.min(), - "max": data.max(), - "median": data.mean(), - "std": data.std(), - "var": data.var(), - } - - -def _hist_date(name, data, bins=10): - hist_data = np.array([t.timestamp() for t in data]) - hist, bin_edges = np.histogram(hist_data, bins=bins) - - fromtimestamp = datetime.fromtimestamp - - return { - "type": "hist_date", - "name": name, - "bins": bins, - "edges": [fromtimestamp(d) for d in bin_edges], - "counts": hist, - "min": fromtimestamp(hist_data.min()), - "max": fromtimestamp(hist_data.max()), - "median": fromtimestamp(hist_data.mean()), - "std": fromtimestamp(hist_data.std()), - "var": fromtimestamp(hist_data.var()), - } - - -def _hist_int(name, data, bins=10): - data = np.array(data) - delta = max(data) - min(data) + 1 - - if bins > delta: - bins = delta - - hist, bin_edges = np.histogram(data, bins=bins) - - return { - "type": "hist_int", - "name": name, - "bins": bins, - "edges": bin_edges, - "counts": hist, - "min": data.min(), - "max": data.max(), - "median": data.mean(), - "std": data.std(), - "var": data.var(), - } - - -def _hist_str(name, data, bins=10, truncate=20): - n_unique = len(set(data)) - - if truncate: - # data = (item[:truncate] for item in data) - data = ( - item[:truncate] + "..." if len(item) > truncate else item for item in data - ) - - data = Counter(data) - - if bins: - labels, counts = zip(*sorted(data.items(), key=itemgetter(1, 0), reverse=True)) - else: - labels, counts = zip(*data.items()) - - return { - "type": "hist_str", - "name": name, - "total": sum(data.values()), - "unique": n_unique, - "labels": labels[:bins], - "counts": counts[:bins], - } - - if __name__ == "__main__": db = MongoDatabase(username="mongoadmin", password="secret") print(db.info()) diff --git a/abcd/backends/utils.py b/abcd/backends/utils.py new file mode 100644 index 00000000..dd6f18aa --- /dev/null +++ b/abcd/backends/utils.py @@ -0,0 +1,131 @@ +from collections import Counter +from datetime import datetime +import logging +from operator import itemgetter + +import numpy as np + +logger = logging.getLogger(__name__) + + +def histogram(name, data, **kwargs): + if not data: + return None + + if data and isinstance(data, list): + ptype = type(data[0]) + + if not all(isinstance(x, ptype) for x in data): + print("Mixed type error of the %s property!", name) + return None + + if ptype == float: + bins = kwargs.get("bins", 10) + return _hist_float(name, data, bins) + + if ptype == int: + bins = kwargs.get("bins", 10) + return _hist_int(name, data, bins) + + if ptype == str: + return _hist_str(name, data, **kwargs) + + if ptype == datetime: + bins = kwargs.get("bins", 10) + return _hist_date(name, data, bins) + + print( + "%s: Histogram for list of %s types are not supported!", name, type(data[0]) + ) + logger.info( + "%s: Histogram for list of %s types are not supported!", name, type(data[0]) + ) + + logger.info("%s: Histogram for %s types are not supported!", name, type(data)) + return None + + +def _hist_float(name, data, bins=10): + data = np.array(data) + hist, bin_edges = np.histogram(data, bins=bins) + + return { + "type": "hist_float", + "name": name, + "bins": bins, + "edges": bin_edges, + "counts": hist, + "min": data.min(), + "max": data.max(), + "median": data.mean(), + "std": data.std(), + "var": data.var(), + } + + +def _hist_date(name, data, bins=10): + hist_data = np.array([t.timestamp() for t in data]) + hist, bin_edges = np.histogram(hist_data, bins=bins) + + fromtimestamp = datetime.fromtimestamp + + return { + "type": "hist_date", + "name": name, + "bins": bins, + "edges": [fromtimestamp(d) for d in bin_edges], + "counts": hist, + "min": fromtimestamp(hist_data.min()), + "max": fromtimestamp(hist_data.max()), + "median": fromtimestamp(hist_data.mean()), + "std": fromtimestamp(hist_data.std()), + "var": fromtimestamp(hist_data.var()), + } + + +def _hist_int(name, data, bins=10): + data = np.array(data) + delta = max(data) - min(data) + 1 + + bins = min(bins, delta) + + hist, bin_edges = np.histogram(data, bins=bins) + + return { + "type": "hist_int", + "name": name, + "bins": bins, + "edges": bin_edges, + "counts": hist, + "min": data.min(), + "max": data.max(), + "median": data.mean(), + "std": data.std(), + "var": data.var(), + } + + +def _hist_str(name, data, bins=10, truncate=20): + n_unique = len(set(data)) + + if truncate: + # data = (item[:truncate] for item in data) + data = ( + item[:truncate] + "..." if len(item) > truncate else item for item in data + ) + + data = Counter(data) + + if bins: + labels, counts = zip(*sorted(data.items(), key=itemgetter(1, 0), reverse=True)) + else: + labels, counts = zip(*data.items()) + + return { + "type": "hist_str", + "name": name, + "total": sum(data.values()), + "unique": n_unique, + "labels": labels[:bins], + "counts": counts[:bins], + } From 3581141222e954dd6a578450c120c2cfb9d13539 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 11 Mar 2024 16:55:07 +0000 Subject: [PATCH 054/112] Update minimum openserch for security --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 614312f3..059a979b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" openpyxl = "^3.1.2" -opensearch-py = "^2.2.0" +opensearch-py = "^2.4.0" pandas = "^2.2" pymongo = "^4.7.3" python = "^3.9" From e3c7ae16508155a9432d6aecc6191111b7a3eda4 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 11 Mar 2024 17:04:05 +0000 Subject: [PATCH 055/112] Update admin default password in CI --- .ci/opensearch/action.yml | 5 +++-- .ci/opensearch/run-opensearch.sh | 9 ++++++++- .github/workflows/ci.yml | 1 + tests/cli.py | 6 +++++- tests/opensearch.py | 6 +++++- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.ci/opensearch/action.yml b/.ci/opensearch/action.yml index bb516d35..f917a612 100644 --- a/.ci/opensearch/action.yml +++ b/.ci/opensearch/action.yml @@ -17,10 +17,10 @@ inputs: description: 'Port where you want to run OpenSearch' required: false default: 9200 - opensearch_password: + opensearch-initial-admin-password: description: 'The password for the user admin in your cluster' required: false - default: 'admin' + default: 'myStrongPassword123!' runs: using: 'docker' @@ -30,3 +30,4 @@ runs: NODES: ${{ inputs.nodes }} PORT: ${{ inputs.port }} SECURITY_ENABLED: ${{ inputs.security-enabled }} + OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${{ inputs.opensearch-initial-admin-password }} diff --git a/.ci/opensearch/run-opensearch.sh b/.ci/opensearch/run-opensearch.sh index 22adc630..4f3678b4 100755 --- a/.ci/opensearch/run-opensearch.sh +++ b/.ci/opensearch/run-opensearch.sh @@ -7,12 +7,18 @@ if [[ -z $OPENSEARCH_VERSION ]]; then exit 1 fi +OPENSEARCH_REQUIRED_VERSION="latest" +# Starting in 2.12.0, security demo configuration script requires an initial admin password +if [ "$OPENSEARCH_VERSION" != "$OPENSEARCH_REQUIRED_VERSION" ]; then + OPENSEARCH_INITIAL_ADMIN_PASSWORD="admin" +fi + for (( node=1; node<=${NODES-1}; node++ )) do port=$((PORT + $node - 1)) if [[ "$SECURITY_ENABLED" == "true" ]]; then - healthcmd="curl -vvv -s --insecure -u admin:admin --fail https://localhost:$port/_cluster/health || exit 1" + healthcmd="curl -vvv -s --insecure -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD --fail https://localhost:$port/_cluster/health || exit 1" security=($(cat <<-END END @@ -34,6 +40,7 @@ END --env discovery.type=single-node \ --env bootstrap.memory_lock=true \ --env "OPENSEARCH_JAVA_OPTS=-Xms4g -Xmx4g" \ + --env OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ "${security[@]}" \ --publish "${port}:${port}" \ --ulimit nofile=65536:65536 \ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 46043a2d..02abfa34 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,6 +49,7 @@ jobs: env: port: 9250 security_enabled: ${{ matrix.security-enabled }} + opensearch-version: ${{ matrix.opensearch }} - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.4.1 diff --git a/tests/cli.py b/tests/cli.py index 40820251..b534f782 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -20,10 +20,14 @@ def setUpClass(cls): cls.security_enabled = os.getenv("security_enabled") == "true" cls.port = int(os.environ["port"]) cls.host = "localhost" + if os.environ["opensearch-version"] == "latest": + cls.credential = "admin:myStrongPassword123!" + else: + cls.credential = "admin:admin" logging.basicConfig(level=logging.INFO) - url = f"opensearch://admin:admin@{cls.host}:{cls.port}" + url = f"opensearch://{cls.credential}@{cls.host}:{cls.port}" if not cls.security_enabled: url += " --disable_ssl" try: diff --git a/tests/opensearch.py b/tests/opensearch.py index e7abc41f..1bb8eb3b 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -27,10 +27,14 @@ def setUpClass(cls): cls.security_enabled = os.getenv("security_enabled") == "true" cls.port = int(os.environ["port"]) cls.host = "localhost" + if os.environ["opensearch-version"] == "latest": + cls.credential = "admin:myStrongPassword123!" + else: + cls.credential = "admin:admin" logging.basicConfig(level=logging.INFO) - url = f"opensearch://admin:admin@{cls.host}:{cls.port}" + url = f"opensearch://{cls.credential}@{cls.host}:{cls.port}" try: abcd = ABCD.from_url( url, From 4cf383467406421ee70ccec8e872627e199fd98f Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 12 Mar 2024 14:53:32 +0000 Subject: [PATCH 056/112] Add query test --- tests/cli.py | 20 +++++++++++++++++++- tests/data/example_2.xyz | 5 +++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/data/example_2.xyz diff --git a/tests/cli.py b/tests/cli.py index b534f782..c20e25b5 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -46,4 +46,22 @@ def test_summary(self): summary = subprocess.run( "abcd summary", shell=True, check=True, capture_output=True, text=True ) - assert "Total number of configurations: 1" in summary.stdout + assert "Total number of configurations:" in summary.stdout + + def test_query(self): + """ + Test lucene-style query. + """ + class_path = os.path.normpath(os.path.abspath(__file__)) + data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" + data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" + subprocess.run(f"abcd upload {data_file_1}", shell=True, check=True) + subprocess.run(f"abcd upload {data_file_2}", shell=True, check=True) + summary = subprocess.run( + "abcd show -p n_atoms -q 'n_atoms : 2'", shell=True, check=True, capture_output=True, text=True + ) + assert "2" in summary.stdout and "3" not in summary.stdout + summary = subprocess.run( + "abcd show -p n_atoms -q 'n_atoms : 3'", shell=True, check=True, capture_output=True, text=True + ) + assert "3" in summary.stdout and "2" not in summary.stdout diff --git a/tests/data/example_2.xyz b/tests/data/example_2.xyz new file mode 100644 index 00000000..9582ab40 --- /dev/null +++ b/tests/data/example_2.xyz @@ -0,0 +1,5 @@ +3 +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" +Si 0.00000000 1.00000000 2.00000000 +Si 3.00000000 4.00000000 5.00000000 +Si 6.00000000 7.00000000 7.00000000 From 33f267754f557de3e7c409257e16d3a349dbc3da Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Wed, 13 Mar 2024 10:31:27 +0000 Subject: [PATCH 057/112] Add OpenSearch refresh CLI command --- abcd/backends/atoms_opensearch.py | 2 +- abcd/frontends/commandline/commands.py | 6 ++++++ abcd/frontends/commandline/parser.py | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index de0ed534..65933a25 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -330,7 +330,7 @@ def info(self): else: host, port = None, None - self.client.indices.refresh(index=self.index_name) + self.refresh() return { "host": host, "port": port, diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index c237947a..4689ef5a 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -260,6 +260,12 @@ def server(*, abcd_url, url, api_only, **kwargs): app.run(host=o.hostname, port=o.port) +@init_config +@init_db +def refresh(*, db, **kwargs): + db.refresh() + + class Formater: def title(self, title): print("", title, "=" * len(title), sep=os.linesep) diff --git a/abcd/frontends/commandline/parser.py b/abcd/frontends/commandline/parser.py index 51b6dbda..b29e2677 100644 --- a/abcd/frontends/commandline/parser.py +++ b/abcd/frontends/commandline/parser.py @@ -204,6 +204,8 @@ "-u", "--url", help="Url to run the server.", default="http://localhost:5000" ) +refresh_parser = subparsers.add_parser("refresh", help="refresh database") +refresh_parser.set_defaults(callback_func=commands.refresh) def main(args=None): kwargs = parser.parse_args(args).__dict__ From 1a0364f2d7beed297308c855e687aeb123c5ee83 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 12 Mar 2024 17:56:06 +0000 Subject: [PATCH 058/112] Replace luqum with native query_string --- abcd/backends/atoms_opensearch.py | 82 ++++++++++--------------------- pyproject.toml | 1 - tests/cli.py | 69 +++++++++++++++++++++++--- tests/data/example.xyz | 2 +- tests/data/example_2.xyz | 2 +- tests/opensearch.py | 2 - 6 files changed, 91 insertions(+), 67 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 65933a25..e7721975 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -9,8 +9,6 @@ from ase import Atoms from ase.io import iread -from luqum.parser import parser -from luqum.elasticsearch import SchemaAnalyzer, ElasticsearchQueryBuilder from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout from abcd.backends import utils @@ -34,55 +32,18 @@ class OpenSearchQuery(AbstractQuerySet): - """ - Class to parse and build queries for OpenSearch. - - Attributes - ---------- - query_builder: ElasticsearchQueryBuilder - Query builder to convert a Tree in an OpenSearch query. - """ - - def __init__( - self, - client: Union[OpenSearch, None] = None, - index_name: Union[str, None] = None, - analyse_schema: bool = False, - ): - """ " - Initialises class. - - Parameters - ---------- - client: Union[OpenSearch, None] - OpenSearch client, used for if analyse_schema is `True` to - characterise the schema. Default is `None`. - index_name: Union[str, None] - Name of OpenSearch index to be analysed, used if analyse_schema - is `True` to characterise the schema. Default is `None`. - analyse_schema: bool, optional - Whether to analyse the schema, as defined by the index_name and client. - Default is `False`. - """ - if analyse_schema and client is not None and index_name is not None: - schema = client.indices.get_mapping()[index_name] - schema_analizer = SchemaAnalyzer(schema) - self.query_builder = ElasticsearchQueryBuilder( - **schema_analizer.query_builder_options() - ) - else: - self.query_builder = ElasticsearchQueryBuilder() + """Class to parse and build queries for OpenSearch.""" def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: """ - Parses and builds queries from strings using ElasticsearchQueryBuilder. + Parses and builds queries for OpenSearch. Parameters ---------- query: Union[dict, str, list, None] - Query to be parsed for OpenSearch. If given as a dictionary, - the query is left unchanged. If given as a string, the - ElasticsearchQueryBuilder is used to build the query. + Query to be parsed for OpenSearch. If passed as a dictionary, the query is + left unchanged. If passed a string or list, the query is treated as a query + string, based on Lucene query syntax. Returns ------- @@ -91,13 +52,9 @@ def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: """ if not query: query = self.get_default_query() - logger.info("parsed query: %s", query) - if isinstance(query, dict): - return query if isinstance(query, str): - tree = parser.parse(query) - return self.query_builder(tree) + return self.build_query_string(query) if isinstance(query, list): if len(query) == 0: return None @@ -105,11 +62,28 @@ def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: return None separator = " AND " joined_query = separator.join(query) - tree = parser.parse(joined_query) - return self.query_builder(tree) + return self.build_query_string(joined_query) + logger.info("parsed query: %s", query) return query if query else None + @staticmethod + def build_query_string(query: str) -> dict: + """ + Build query_string (Lucene syntax) query. + + Parameters + ---------- + query : str + Query with Lucene syntax. + + Returns + ------- + dict + Parsed query for query_string query. + """ + return {"query_string": {"query": query}} + @staticmethod def get_default_query() -> dict: """ @@ -254,7 +228,6 @@ def __init__( index_name: str = "atoms", username: str = "admin", password: str = "admin", - analyse_schema: bool = True, **kwargs, ): """ @@ -275,9 +248,6 @@ def __init__( OpenSearch username. Default is `admin`. password: str, optional OpenSearch password. Default is `admin`. - analyse_schema: bool, optional - Whether to analyse the OpenSearch schema when building queries. - Default is `True`. """ super().__init__() @@ -314,7 +284,7 @@ def __init__( self.db = db_name self.index_name = index_name self.create() - self.parser = OpenSearchQuery(self.client, self.index_name, analyse_schema) + self.parser = OpenSearchQuery() def info(self): """ diff --git a/pyproject.toml b/pyproject.toml index 059a979b..e37280c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ documentation = "https://libatoms.github.io/abcd/" ase = "^3.23" chardet = "^5.2.0" lark = "^1.1.9" -luqum = "^0.13.0" matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" diff --git a/tests/cli.py b/tests/cli.py index c20e25b5..f52a8ea1 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -42,11 +42,17 @@ def test_summary(self): """ class_path = os.path.normpath(os.path.abspath(__file__)) data_file = os.path.dirname(class_path) + "/data/example.xyz" - subprocess.run(f"abcd upload {data_file}", shell=True, check=True) + + subprocess.run( + f"abcd upload {data_file} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run(f"abcd refresh", shell=True, check=True) + summary = subprocess.run( "abcd summary", shell=True, check=True, capture_output=True, text=True ) - assert "Total number of configurations:" in summary.stdout + assert "Total number of configurations" in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) def test_query(self): """ @@ -55,13 +61,64 @@ def test_query(self): class_path = os.path.normpath(os.path.abspath(__file__)) data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" - subprocess.run(f"abcd upload {data_file_1}", shell=True, check=True) - subprocess.run(f"abcd upload {data_file_2}", shell=True, check=True) + + subprocess.run( + f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run( + f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run(f"abcd refresh", shell=True, check=True) + summary = subprocess.run( - "abcd show -p n_atoms -q 'n_atoms : 2'", shell=True, check=True, capture_output=True, text=True + "abcd show -p n_atoms -q 'n_atoms : 2'", + shell=True, + check=True, + capture_output=True, + text=True, ) assert "2" in summary.stdout and "3" not in summary.stdout summary = subprocess.run( - "abcd show -p n_atoms -q 'n_atoms : 3'", shell=True, check=True, capture_output=True, text=True + "abcd show -p n_atoms -q 'n_atoms : 3'", + shell=True, + check=True, + capture_output=True, + text=True, ) assert "3" in summary.stdout and "2" not in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + def test_range_query(self): + """ + Test lucene-style ranged query. + """ + class_path = os.path.normpath(os.path.abspath(__file__)) + data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" + data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" + + subprocess.run( + f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run( + f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run(f"abcd refresh", shell=True, check=True) + + summary = subprocess.run( + "abcd summary -p energy -q 'energy:[-100 TO -99]'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "Total number of configurations: 1" in summary.stdout + + summary = subprocess.run( + "abcd summary -p energy -q 'energy:[-102 TO -99]'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "Total number of configurations: 2" in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) diff --git a/tests/data/example.xyz b/tests/data/example.xyz index 452b9d67..2a81c26d 100644 --- a/tests/data/example.xyz +++ b/tests/data/example.xyz @@ -1,4 +1,4 @@ 2 -Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" energy=-101.9 Si 0.00000000 1.00000000 2.00000000 Si 4.00000000 5.00000000 6.00000000 diff --git a/tests/data/example_2.xyz b/tests/data/example_2.xyz index 9582ab40..13315d57 100644 --- a/tests/data/example_2.xyz +++ b/tests/data/example_2.xyz @@ -1,5 +1,5 @@ 3 -Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" energy=-99.33 Si 0.00000000 1.00000000 2.00000000 Si 3.00000000 4.00000000 5.00000000 Si 6.00000000 7.00000000 7.00000000 diff --git a/tests/opensearch.py b/tests/opensearch.py index 1bb8eb3b..259536dc 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -39,7 +39,6 @@ def setUpClass(cls): abcd = ABCD.from_url( url, index_name="test_index", - analyse_schema=False, use_ssl=cls.security_enabled, ) except (ConnectionError, ConnectionResetError): @@ -47,7 +46,6 @@ def setUpClass(cls): abcd = ABCD.from_url( url, index_name="test_index", - analyse_schema=False, use_ssl=cls.security_enabled, ) From 278cc3f40d45743bb8efaad3565497ae1a372bff Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 23 Apr 2024 17:38:12 +0000 Subject: [PATCH 059/112] Speed up property function --- abcd/backends/atoms_opensearch.py | 50 +++++++++++++++++++++---------- tests/opensearch.py | 13 +++++--- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index e7721975..a4eb752a 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -9,7 +9,13 @@ from ase import Atoms from ase.io import iread -from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout +from opensearchpy import ( + OpenSearch, + helpers, + AuthenticationException, + ConnectionTimeout, + RequestError, +) from abcd.backends import utils from abcd.database import AbstractABCD @@ -533,8 +539,7 @@ def count(self, query: Union[dict, str, None] = None) -> int: def property(self, name, query: Union[dict, str, None] = None) -> list: """ Gets all values of a specified property for matching documents in the - database. This method is very slow, so it is preferable to use - alternative methods where possible, such as count_property. + database. Alternative methods, such as count_property, may be faster. Parameters ---------- @@ -543,24 +548,39 @@ def property(self, name, query: Union[dict, str, None] = None) -> list: Returns ------- - List of values for the specified property for all matching documents. + list + List of values for the specified property for all matching documents. """ query = self.parser(query) query = { "query": query, } - return [ - hit["_source"][format(name)] - for hit in helpers.scan( - self.client, - index=self.index_name, - query=query, - stored_fields=format(name), - _source=format(name), - ) - if format(name) in hit["_source"] - ] + try: + return [ + hit["fields"][format(name)][0] + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + _source=False, + stored_fields="_none_", + docvalue_fields=[format(name)], + ) + if "fields" in hit and format(name) in hit["fields"] + ] + except RequestError: + return [ + hit["_source"][format(name)] + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + stored_fields=format(name), + _source=format(name), + ) + if format(name) in hit["_source"] + ] def count_property(self, name, query: Union[dict, str, None] = None) -> dict: """ diff --git a/tests/opensearch.py b/tests/opensearch.py index 259536dc..23a56dd4 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -235,7 +235,7 @@ def test_property(self): xyz_1 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" prop_1="test_1" pbc="F F F" + Properties=species:S:1:pos:R:3 energy=-5.0 prop_1="test_1" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -244,11 +244,11 @@ def test_property(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1) + self.abcd.push(atoms_1, store_calc=False) xyz_2 = StringIO( """2 - Properties=species:S:1:pos:R:3 s="sadf" prop_2="test_2" pbc="F F F" + Properties=species:S:1:pos:R:3 energy=-10.0 prop_2="test_2" Si 0.00000000 0.00000000 0.00000000 Si 0.00000000 0.00000000 0.00000000 """ @@ -257,13 +257,18 @@ def test_property(self): atoms_2 = read(xyz_2, format="extxyz") assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.push(atoms_2) + self.abcd.push(atoms_2, store_calc=False) self.abcd.refresh() prop = self.abcd.property("prop_1") expected_prop = ["test_1"] self.assertEqual(prop, expected_prop) + prop = self.abcd.property("energy") + expected_prop = [-5.0, -10.0] + self.assertEqual(prop[0], expected_prop[0]) + self.assertEqual(prop[1], expected_prop[1]) + def test_properties(self): """ Test getting all properties from the database. From 30ad1754cb1415f6c8d400d29512e2ebbf04d9b2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 29 Apr 2024 13:50:58 +0000 Subject: [PATCH 060/112] Apply black formatter --- abcd/frontends/commandline/parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/abcd/frontends/commandline/parser.py b/abcd/frontends/commandline/parser.py index b29e2677..a9d7b2ca 100644 --- a/abcd/frontends/commandline/parser.py +++ b/abcd/frontends/commandline/parser.py @@ -207,6 +207,7 @@ refresh_parser = subparsers.add_parser("refresh", help="refresh database") refresh_parser.set_defaults(callback_func=commands.refresh) + def main(args=None): kwargs = parser.parse_args(args).__dict__ From a6f434aeb67a825083a116125c97bfb48cf747c9 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 29 Apr 2024 13:52:15 +0000 Subject: [PATCH 061/112] Add timeout to count --- abcd/backends/atoms_opensearch.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index a4eb752a..433a74ce 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -515,7 +515,7 @@ def get_atoms( ): yield AtomsModel(None, None, hit["_source"]).to_ase() - def count(self, query: Union[dict, str, None] = None) -> int: + def count(self, query: Union[dict, str, None] = None, timeout=30.0) -> int: """ Counts number of documents in the database. @@ -523,6 +523,8 @@ def count(self, query: Union[dict, str, None] = None) -> int: ---------- query: Union[dict, str, None] Query to filter documents to be counted. Default is `None`. + timeout: float + Timeout for request in seconds. Returns ------- @@ -534,7 +536,9 @@ def count(self, query: Union[dict, str, None] = None) -> int: "query": query, } - return self.client.count(index=self.index_name, body=body)["count"] + return self.client.count(index=self.index_name, body=body, timeout=timeout)[ + "count" + ] def property(self, name, query: Union[dict, str, None] = None) -> list: """ From d587cc06ca2208822ae83cc6d531f8262cfd53b4 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 29 Apr 2024 13:52:38 +0000 Subject: [PATCH 062/112] Add comment for property function --- abcd/backends/atoms_opensearch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 433a74ce..229fad16 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -560,6 +560,8 @@ def property(self, name, query: Union[dict, str, None] = None) -> list: "query": query, } + # Try to use docvalue_fields to avoid loading entire document + # But not all datatypes supported by default try: return [ hit["fields"][format(name)][0] From 478a8a4be1a2a7b21b0cdb9870a843ed2e333749 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 29 Apr 2024 13:53:37 +0000 Subject: [PATCH 063/112] Fix deleting keys via CLI --- abcd/frontends/commandline/commands.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index 4689ef5a..56361687 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -4,6 +4,7 @@ import numpy as np from abcd.frontends.commandline.decorators import check_remote, init_config, init_db +from abcd.backends.atoms_opensearch import OpenSearchDatabase logger = logging.getLogger(__name__) @@ -215,7 +216,13 @@ def key_delete(*, db, query, yes, keys, **kwargs): keys = " ".join(keys) data = parser.parse(keys) - query = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) + if isinstance(db, OpenSearchDatabase): + query = [ + f"{query} AND ({' OR '.join([f'{key}:*' for key in data.keys()])})" + for query in query + ] + else: + query = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) if not yes: print( @@ -224,7 +231,7 @@ def key_delete(*, db, query, yes, keys, **kwargs): ) exit(1) - for k in keys: + for k in data.keys(): db.delete_property(k, query=query) From f677822a0d14d1df45402e458ab0ef53ea43cdd9 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 29 Apr 2024 15:04:22 +0000 Subject: [PATCH 064/112] Fix adding keys via CLI --- abcd/frontends/commandline/commands.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index 56361687..faa7233e 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -179,9 +179,18 @@ def key_add(*, db, query, keys, **kwargs): data = parser.parse(keys) if query: - test = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) + if isinstance(db, OpenSearchDatabase): + test = [ + f"{query} AND ({' OR '.join([f'{key}:*' for key in data.keys()])})" + for query in query + ] + else: + test = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) else: - test = ("OR", *(("NAME", key) for key in data.keys())) + if isinstance(db, OpenSearchDatabase): + test = ' OR '.join([f"{key}:*" for key in data.keys()]) + else: + test = ("OR", *(("NAME", key) for key in data.keys())) if db.count(query=test): print( From 33dc0bc245ddf579e01340ff3dd2761166488083 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Mon, 29 Apr 2024 15:23:43 +0000 Subject: [PATCH 065/112] Fix renaming properties --- abcd/backends/atoms_opensearch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 229fad16..8e66bfd5 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -832,10 +832,10 @@ def rename_property( query = self.parser(query) logger.info("rename: query=%s, old=%s, new=%s", query, name, new_name) - script_txt = f"if (!ctx._source.containsKey('{new_name}')) {{ " + script_txt = f"if (!ctx._source.containsKey(params.new_name)) {{ " script_txt += ( f"ctx._source.{new_name} = ctx._source.{name};" - " ctx._source.remove('params.name');" + " ctx._source.remove(params.name);" " for (int i=0; i Date: Mon, 29 Apr 2024 15:34:22 +0000 Subject: [PATCH 066/112] Add notebook with example queries --- tutorials/abcd_opensearch_queries.ipynb | 1143 +++++++++++++++++++++++ 1 file changed, 1143 insertions(+) create mode 100644 tutorials/abcd_opensearch_queries.ipynb diff --git a/tutorials/abcd_opensearch_queries.ipynb b/tutorials/abcd_opensearch_queries.ipynb new file mode 100644 index 00000000..507289d7 --- /dev/null +++ b/tutorials/abcd_opensearch_queries.ipynb @@ -0,0 +1,1143 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "# Usage of ABCD database with extra information" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from abcd import ABCD\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "First of all, we need to define the url of the database. It could be local or remote:\n", + "\n", + "- direct access: url = 'opensearch://admin:admin@localhost:9200'\n", + "- api access: url = 'http://localhost/api'\n", + "\n", + "using with statement to catch the riased exceptions. You may can ignore them but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "url = 'opensearch://admin:myStrongPassword123!@localhost:9200'\n", + "abcd = ABCD.from_url(url)\n", + "\n", + "print(abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 2055\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Cleanup \n", + "\n", + "WARNING!! Remove all elements from the database.\n", + "Only supported in the case of local access" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.destroy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.create()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 0\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Uploading configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/ubuntu/abcd/tutorials\n" + ] + } + ], + "source": [ + "from ase.io import iread\n", + "!pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data can be entered into the database as ASE Atoms objects, allowing any format readable by ase.io.read to be used." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "file = directory / 'input.data.2055.xyz'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 12.6 s, sys: 152 ms, total: 12.7 s\n", + "Wall time: 18.8 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with abcd as db:\n", + " for atoms in iread(file.as_posix(), index=slice(None)):\n", + " db.push(atoms, store_calc=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.refresh()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example queries" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Text queries" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "316\n" + ] + } + ], + "source": [ + "# Explicit queries via dictionaries\n", + "\n", + "query = {\"match_all\": {}}\n", + "print(abcd.count(query))\n", + "query = {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + "}\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n" + ] + } + ], + "source": [ + "# Basic text\n", + "\n", + "query = 'ubuntu'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "0\n" + ] + } + ], + "source": [ + "# Query specific fields\n", + "\n", + "query = 'username:ubuntu'\n", + "print(abcd.count(query))\n", + "\n", + "query = 'formula:ubuntu'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n" + ] + } + ], + "source": [ + "# Range\n", + "\n", + "query = 'username:[ubunta TO ubuntx]'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "2055\n" + ] + } + ], + "source": [ + "# Wildcards\n", + "\n", + "query = 'ubu?tu'\n", + "print(abcd.count(query))\n", + "\n", + "query = 'username: *'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "316\n", + "2055\n" + ] + } + ], + "source": [ + "# Logical combinations\n", + "\n", + "query = 'username:[ubunta TO ubuntx] AND formula: C48H28O32Zr6'\n", + "print(abcd.count(query))\n", + "\n", + "query = 'username:[ubunta TO ubuntx] OR formula: C48H28O32Zr6'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "316\n", + "C48H28O32Zr6\n" + ] + } + ], + "source": [ + "# Regex - wrap with `/`\n", + "\n", + "query = '/u.untu/'\n", + "print(abcd.count(query))\n", + "\n", + "# Search for (something like) C48H28O32Zr6\n", + "# Note: anchored by default, so cannot use ^ and $\n", + "query = 'formula: /C.\\d[G-I]28O32Z\\w[^7]/'\n", + "print(abcd.count(query))\n", + "\n", + "for prop in abcd.property(\"formula\", query):\n", + " print(prop)\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numerical queries" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# All energies\n", + "\n", + "data = abcd.property('energy')\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# No lower bound\n", + "\n", + "query = \"energy: [* TO -30000]\"\n", + "data = abcd.property('energy', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkEAAAGiCAYAAADgPBIcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAoaklEQVR4nO3df3RU9Z3/8VcSyIQAM9kAySRL+CHKjygIBzSMRUpLSoCAeIxni6UQOSwUTrBH0kVMD4KwW2HBFloWxd2uYlcilR6RBQWK/AhVAmIq5accYaGJhUk4ZJkAXQZCPt8/9stdRgLJkIT8+Dwf59xj5n4+987nnetn8uLOvTMRxhgjAAAAy0Q29gAAAAAaAyEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFgprBD0+uuvq1+/fnK73XK73fL5fNq8ebPTPmzYMEVERIQs06dPD9lHcXGxMjMzFRsbq4SEBM2ePVuVlZX1Uw0AAEAttQqnc+fOnbV48WI98MADMsbo7bff1rhx4/TFF1/owQcflCRNnTpVCxcudLaJjY11fr5+/boyMzPl9Xq1Z88enT17VpMmTVLr1q31yiuv1FNJAAAANYuo6xeoxsfHa+nSpZoyZYqGDRum/v37a/ny5dX23bx5s8aMGaMzZ84oMTFRkrRq1SrNmTNH586dU3R0dF2GAgAAUGthnQm62fXr17Vu3TpdvnxZPp/PWb9mzRq988478nq9Gjt2rF566SXnbFBhYaH69u3rBCBJysjI0IwZM3TkyBENGDCg2ucKBoMKBoPO46qqKpWXl6tDhw6KiIi42xIAAMA9ZIzRxYsXlZycrMjIxr8sOewQdOjQIfl8Pl25ckXt2rXT+vXrlZqaKkn6wQ9+oK5duyo5OVkHDx7UnDlzdPz4cb3//vuSJL/fHxKAJDmP/X7/bZ9z0aJFWrBgQbhDBQAATVBJSYk6d+7c2MMIPwT16tVLBw4cUCAQ0O9+9ztlZ2eroKBAqampmjZtmtOvb9++SkpK0vDhw3Xy5En16NHjrgeZl5en3Nxc53EgEFCXLl1UUlIit9t91/sFAAD3TkVFhVJSUtS+ffvGHoqkuwhB0dHRuv/++yVJAwcO1P79+/XLX/5Sb7zxxi1909LSJEknTpxQjx495PV69dlnn4X0KS0tlSR5vd7bPqfL5ZLL5bpl/Y271AAAQPPRVC5lqfMbclVVVSHX69zswIEDkqSkpCRJks/n06FDh1RWVub02bZtm9xut/OWGgAAwL0Q1pmgvLw8jRo1Sl26dNHFixeVn5+vXbt2aevWrTp58qTy8/M1evRodejQQQcPHtSsWbM0dOhQ9evXT5I0YsQIpaamauLEiVqyZIn8fr/mzp2rnJycas/0AAAANJSwQlBZWZkmTZqks2fPyuPxqF+/ftq6dau+973vqaSkRB9//LGWL1+uy5cvKyUlRVlZWZo7d66zfVRUlDZt2qQZM2bI5/Opbdu2ys7ODvlcIQAAgHuhzp8T1BgqKirk8XgUCAS4JggAgGaiqf39bvyb9AEAABoBIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWCms7w4Daqvbix/W2Of04sx7MBIAAKrHmSAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwUqvGHgAAALi3ur34YY19Ti/OvAcjaVycCQIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASmGFoNdff139+vWT2+2W2+2Wz+fT5s2bnfYrV64oJydHHTp0ULt27ZSVlaXS0tKQfRQXFyszM1OxsbFKSEjQ7NmzVVlZWT/VAAAA1FJYIahz585avHixioqK9Pnnn+u73/2uxo0bpyNHjkiSZs2apY0bN2rdunUqKCjQmTNn9NRTTznbX79+XZmZmbp69ar27Nmjt99+W6tXr9a8efPqtyoAAIAaRBhjTF12EB8fr6VLl+rpp59Wp06dlJ+fr6efflqS9OWXX6pPnz4qLCzU4MGDtXnzZo0ZM0ZnzpxRYmKiJGnVqlWaM2eOzp07p+jo6Fo9Z0VFhTwejwKBgNxud12GjwbS7cUPa+xzenHmPRgJAOCbGus1uqn9/b7ra4KuX7+utWvX6vLly/L5fCoqKtK1a9eUnp7u9Ondu7e6dOmiwsJCSVJhYaH69u3rBCBJysjIUEVFhXM2qTrBYFAVFRUhCwAAQF2EHYIOHTqkdu3ayeVyafr06Vq/fr1SU1Pl9/sVHR2tuLi4kP6JiYny+/2SJL/fHxKAbrTfaLudRYsWyePxOEtKSkq4wwYAAAgRdgjq1auXDhw4oH379mnGjBnKzs7W0aNHG2Jsjry8PAUCAWcpKSlp0OcDAAAtX6twN4iOjtb9998vSRo4cKD279+vX/7yl/r+97+vq1ev6sKFCyFng0pLS+X1eiVJXq9Xn332Wcj+btw9dqNPdVwul1wuV7hDBQAAuK06f05QVVWVgsGgBg4cqNatW2v79u1O2/Hjx1VcXCyfzydJ8vl8OnTokMrKypw+27Ztk9vtVmpqal2HAgAAUGthnQnKy8vTqFGj1KVLF128eFH5+fnatWuXtm7dKo/HoylTpig3N1fx8fFyu9167rnn5PP5NHjwYEnSiBEjlJqaqokTJ2rJkiXy+/2aO3eucnJyONMDAADuqbBCUFlZmSZNmqSzZ8/K4/GoX79+2rp1q773ve9JkpYtW6bIyEhlZWUpGAwqIyNDr732mrN9VFSUNm3apBkzZsjn86lt27bKzs7WwoUL67cqAACAGtT5c4IaQ1P7nAHcis8JAoCmi88J+l98dxgAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwUlghaNGiRXrkkUfUvn17JSQk6Mknn9Tx48dD+gwbNkwREREhy/Tp00P6FBcXKzMzU7GxsUpISNDs2bNVWVlZ92oAAABqqVU4nQsKCpSTk6NHHnlElZWV+ulPf6oRI0bo6NGjatu2rdNv6tSpWrhwofM4NjbW+fn69evKzMyU1+vVnj17dPbsWU2aNEmtW7fWK6+8Ug8lAQAA1CysELRly5aQx6tXr1ZCQoKKioo0dOhQZ31sbKy8Xm+1+/j973+vo0eP6uOPP1ZiYqL69++vf/zHf9ScOXP08ssvKzo6+i7KAAAACE+drgkKBAKSpPj4+JD1a9asUceOHfXQQw8pLy9Pf/3rX522wsJC9e3bV4mJic66jIwMVVRU6MiRI3UZDgAAQK2FdSboZlVVVXr++ef1rW99Sw899JCz/gc/+IG6du2q5ORkHTx4UHPmzNHx48f1/vvvS5L8fn9IAJLkPPb7/dU+VzAYVDAYdB5XVFTc7bABAAAk1SEE5eTk6PDhw/rkk09C1k+bNs35uW/fvkpKStLw4cN18uRJ9ejR466ea9GiRVqwYMHdDhUAAOAWd/V22MyZM7Vp0ybt3LlTnTt3vmPftLQ0SdKJEyckSV6vV6WlpSF9bjy+3XVEeXl5CgQCzlJSUnI3wwYAAHCEFYKMMZo5c6bWr1+vHTt2qHv37jVuc+DAAUlSUlKSJMnn8+nQoUMqKytz+mzbtk1ut1upqanV7sPlcsntdocsAAAAdRHW22E5OTnKz8/Xhg0b1L59e+caHo/HozZt2ujkyZPKz8/X6NGj1aFDBx08eFCzZs3S0KFD1a9fP0nSiBEjlJqaqokTJ2rJkiXy+/2aO3eucnJy5HK56r9CAACAaoR1Juj1119XIBDQsGHDlJSU5Cy//e1vJUnR0dH6+OOPNWLECPXu3Vs/+clPlJWVpY0bNzr7iIqK0qZNmxQVFSWfz6cf/vCHmjRpUsjnCgEAADS0sM4EGWPu2J6SkqKCgoIa99O1a1d99NFH4Tw1AABAveK7wwAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKYYWgRYsW6ZFHHlH79u2VkJCgJ598UsePHw/pc+XKFeXk5KhDhw5q166dsrKyVFpaGtKnuLhYmZmZio2NVUJCgmbPnq3Kysq6VwMAAFBLYYWggoIC5eTkaO/evdq2bZuuXbumESNG6PLly06fWbNmaePGjVq3bp0KCgp05swZPfXUU0779evXlZmZqatXr2rPnj16++23tXr1as2bN6/+qgIAAKhBhDHG3O3G586dU0JCggoKCjR06FAFAgF16tRJ+fn5evrppyVJX375pfr06aPCwkINHjxYmzdv1pgxY3TmzBklJiZKklatWqU5c+bo3Llzio6OrvF5Kyoq5PF4FAgE5Ha773b4aEDdXvywxj6nF2feg5EAAL6psV6jm9rf7zpdExQIBCRJ8fHxkqSioiJdu3ZN6enpTp/evXurS5cuKiwslCQVFhaqb9++TgCSpIyMDFVUVOjIkSPVPk8wGFRFRUXIAgAAUBd3HYKqqqr0/PPP61vf+pYeeughSZLf71d0dLTi4uJC+iYmJsrv9zt9bg5AN9pvtFVn0aJF8ng8zpKSknK3wwYAAJBUhxCUk5Ojw4cPa+3atfU5nmrl5eUpEAg4S0lJSYM/JwAAaNla3c1GM2fO1KZNm7R792517tzZWe/1enX16lVduHAh5GxQaWmpvF6v0+ezzz4L2d+Nu8du9Pkml8sll8t1N0MFAACoVlhngowxmjlzptavX68dO3aoe/fuIe0DBw5U69attX37dmfd8ePHVVxcLJ/PJ0ny+Xw6dOiQysrKnD7btm2T2+1WampqXWoBAACotbDOBOXk5Cg/P18bNmxQ+/btnWt4PB6P2rRpI4/HoylTpig3N1fx8fFyu9167rnn5PP5NHjwYEnSiBEjlJqaqokTJ2rJkiXy+/2aO3eucnJyONsDAADumbBC0Ouvvy5JGjZsWMj6t956S88++6wkadmyZYqMjFRWVpaCwaAyMjL02muvOX2joqK0adMmzZgxQz6fT23btlV2drYWLlxYt0oAAADCEFYIqs1HCsXExGjlypVauXLlbft07dpVH330UThPDQAAUK/47jAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwUtghaPfu3Ro7dqySk5MVERGhDz74IKT92WefVURERMgycuTIkD7l5eWaMGGC3G634uLiNGXKFF26dKlOhQAAAIQj7BB0+fJlPfzww1q5cuVt+4wcOVJnz551lnfffTekfcKECTpy5Ii2bdumTZs2affu3Zo2bVr4owcAALhLrcLdYNSoURo1atQd+7hcLnm93mrbjh07pi1btmj//v0aNGiQJGnFihUaPXq0Xn31VSUnJ4c7JAAAgLA1yDVBu3btUkJCgnr16qUZM2bo/PnzTlthYaHi4uKcACRJ6enpioyM1L59+6rdXzAYVEVFRcgCAABQF/UegkaOHKnf/OY32r59u/75n/9ZBQUFGjVqlK5fvy5J8vv9SkhICNmmVatWio+Pl9/vr3afixYtksfjcZaUlJT6HjYAALBM2G+H1WT8+PHOz3379lW/fv3Uo0cP7dq1S8OHD7+rfebl5Sk3N9d5XFFRQRACAAB10uC3yN93333q2LGjTpw4IUnyer0qKysL6VNZWany8vLbXkfkcrnkdrtDFgAAgLpo8BD09ddf6/z580pKSpIk+Xw+XbhwQUVFRU6fHTt2qKqqSmlpaQ09HAAAAEl38XbYpUuXnLM6knTq1CkdOHBA8fHxio+P14IFC5SVlSWv16uTJ0/qhRde0P3336+MjAxJUp8+fTRy5EhNnTpVq1at0rVr1zRz5kyNHz+eO8MAAMA9E/aZoM8//1wDBgzQgAEDJEm5ubkaMGCA5s2bp6ioKB08eFBPPPGEevbsqSlTpmjgwIH6wx/+IJfL5exjzZo16t27t4YPH67Ro0dryJAh+td//df6qwoAAKAGYZ8JGjZsmIwxt23funVrjfuIj49Xfn5+uE8NAABQb/juMAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGClsEPQ7t27NXbsWCUnJysiIkIffPBBSLsxRvPmzVNSUpLatGmj9PR0ffXVVyF9ysvLNWHCBLndbsXFxWnKlCm6dOlSnQoBAAAIR9gh6PLly3r44Ye1cuXKatuXLFmiX/3qV1q1apX27duntm3bKiMjQ1euXHH6TJgwQUeOHNG2bdu0adMm7d69W9OmTbv7KgAAAMLUKtwNRo0apVGjRlXbZozR8uXLNXfuXI0bN06S9Jvf/EaJiYn64IMPNH78eB07dkxbtmzR/v37NWjQIEnSihUrNHr0aL366qtKTk6uQzkAAAC1U6/XBJ06dUp+v1/p6enOOo/Ho7S0NBUWFkqSCgsLFRcX5wQgSUpPT1dkZKT27dtXn8MBAAC4rbDPBN2J3++XJCUmJoasT0xMdNr8fr8SEhJCB9GqleLj450+3xQMBhUMBp3HFRUV9TlsAABgoWZxd9iiRYvk8XicJSUlpbGHBAAAmrl6DUFer1eSVFpaGrK+tLTUafN6vSorKwtpr6ysVHl5udPnm/Ly8hQIBJylpKSkPocNAAAsVK8hqHv37vJ6vdq+fbuzrqKiQvv27ZPP55Mk+Xw+XbhwQUVFRU6fHTt2qKqqSmlpadXu1+Vyye12hywAAAB1EfY1QZcuXdKJEyecx6dOndKBAwcUHx+vLl266Pnnn9c//dM/6YEHHlD37t310ksvKTk5WU8++aQkqU+fPho5cqSmTp2qVatW6dq1a5o5c6bGjx/PnWEAAOCeCTsEff755/rOd77jPM7NzZUkZWdna/Xq1XrhhRd0+fJlTZs2TRcuXNCQIUO0ZcsWxcTEONusWbNGM2fO1PDhwxUZGamsrCz96le/qodyAAAAaifCGGMaexDhqqiokMfjUSAQ4K2xJqrbix/W2Of04sx7MBIAwDc11mt0U/v73SzuDgMAAKhvhCAAAGAlQhAAALASIQgAAFiJEAQAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKhCAAAGAlQhAAALASIQgAAFiJEAQAAKzUqrEHgPrR7cUPa+xzenHmPRgJAADNA2eCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABYiRAEAACsRAgCAABWIgQBAAArEYIAAICVCEEAAMBKfG0GAAB3ia8sat44EwQAAKxU7yHo5ZdfVkRERMjSu3dvp/3KlSvKyclRhw4d1K5dO2VlZam0tLS+hwEAAHBHDXIm6MEHH9TZs2ed5ZNPPnHaZs2apY0bN2rdunUqKCjQmTNn9NRTTzXEMAAAAG6rQa4JatWqlbxe7y3rA4GA/v3f/135+fn67ne/K0l666231KdPH+3du1eDBw9uiOEAAADcokHOBH311VdKTk7WfffdpwkTJqi4uFiSVFRUpGvXrik9Pd3p27t3b3Xp0kWFhYUNMRQAAIBq1fuZoLS0NK1evVq9evXS2bNntWDBAj3++OM6fPiw/H6/oqOjFRcXF7JNYmKi/H7/bfcZDAYVDAadxxUVFfU9bAAAYJl6D0GjRo1yfu7Xr5/S0tLUtWtXvffee2rTps1d7XPRokVasGBBfQ0RAACg4W+Rj4uLU8+ePXXixAl5vV5dvXpVFy5cCOlTWlpa7TVEN+Tl5SkQCDhLSUlJA48aAAC0dA0egi5duqSTJ08qKSlJAwcOVOvWrbV9+3an/fjx4youLpbP57vtPlwul9xud8gCAABQF/X+dtg//MM/aOzYseratavOnDmj+fPnKyoqSs8884w8Ho+mTJmi3NxcxcfHy+1267nnnpPP5+POMAAAcE/Vewj6+uuv9cwzz+j8+fPq1KmThgwZor1796pTp06SpGXLlikyMlJZWVkKBoPKyMjQa6+9Vt/DAAAAuKN6D0Fr1669Y3tMTIxWrlyplStX1vdTAwAA1BrfHQYAAKxECAIAAFYiBAEAACsRggAAgJUIQQAAwEqEIAAAYCVCEAAAsBIhCAAAWKnePywRgL26vfhhjX1OL868ByMBgJpxJggAAFiJEAQAAKxECAIAAFYiBAEAACtxYTQAoMFx0TyaIs4EAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEneHAfWMu2CAu8Pcwb1GCAJQK7X5A4WWh2CClowQBACoEwIymitCEPD/8S9eALALIQjAPUXYBNBUEILuEi/kANA08fqM2uIWeQAAYCXOBDUy/sUCoCHw2gLUjBAEAEA1uOut5SMEoUnjX7MAgIZCCGpA/CsCAICmixAEKxBI74zfz51xRhJombg7DAAAWIkzQQCapfo6e8UZHMBehCA0eza/lcPbNABw9whBzYDNf+QB3IrXBKB+EIIA4B7hzF3TQZCERAgCmixepAGgYRGCAFiNsAnYixBUDV4UAQBo+ficIAAAYCXOBAGNgLONuB3+3wDuHc4EAQAAKzVqCFq5cqW6deummJgYpaWl6bPPPmvM4QAAAIs0Wgj67W9/q9zcXM2fP19//OMf9fDDDysjI0NlZWWNNSQAAGCRRgtBv/jFLzR16lRNnjxZqampWrVqlWJjY/Xmm2821pAAAIBFGuXC6KtXr6qoqEh5eXnOusjISKWnp6uwsPCW/sFgUMFg0HkcCAQkSRUVFQ0yvqrgXxtkv42toX5f1anN77A242lqx4Ix3xuM+d5ojmPuMmtdYw8hbPfytbe26us1Olw39mmMqfd93xXTCP7yl78YSWbPnj0h62fPnm0effTRW/rPnz/fSGJhYWFhYWFpAUtJScm9ihx31Cxukc/Ly1Nubq7zuKqqSuXl5erQoYMiIiLuer8VFRVKSUlRSUmJ3G53fQy1ybKpVsmuem2qVbKrXptqleyq16Zapf+rt7i4WBEREUpOTm7sIUlqpLfDOnbsqKioKJWWloasLy0tldfrvaW/y+WSy+UKWRcXF1dv43G73Vb8TyjZVatkV7021SrZVa9NtUp21WtTrZLk8XiaVL2NcmF0dHS0Bg4cqO3btzvrqqqqtH37dvl8vsYYEgAAsEyjvR2Wm5ur7OxsDRo0SI8++qiWL1+uy5cva/LkyY01JAAAYJFGC0Hf//73de7cOc2bN09+v1/9+/fXli1blJiYeM/G4HK5NH/+/FveamuJbKpVsqtem2qV7KrXplolu+q1qVap6dYbYUxTuU8NAADg3uG7wwAAgJUIQQAAwEqEIAAAYCVCEAAAsFKLCEHdunVTREREyLJ48eKQPgcPHtTjjz+umJgYpaSkaMmSJbfsZ926derdu7diYmLUt29fffTRRyHtxhjNmzdPSUlJatOmjdLT0/XVV1+F9CkvL9eECRPkdrsVFxenKVOm6NKlS/VeczAYVP/+/RUREaEDBw44619++eVbfhcRERFq27at02f16tW3tMfExDTZWu9U7+nTp6utd+/evSHbt4Rju2vXLo0bN05JSUlq27at+vfvrzVr1oRs25KOrdRy5u0TTzyhLl26KCYmRklJSZo4caLOnDnjtLe0eVtTvS1p3tZUa0uatzXVKjXDOdtIX9dRr7p27WoWLlxozp496yyXLl1y2gOBgElMTDQTJkwwhw8fNu+++65p06aNeeONN5w+n376qYmKijJLliwxR48eNXPnzjWtW7c2hw4dcvosXrzYeDwe88EHH5g//elP5oknnjDdu3c3//M//+P0GTlypHn44YfN3r17zR/+8Adz//33m2eeeabea/7xj39sRo0aZSSZL774wll/8eLFkN/D2bNnTWpqqsnOznb6vPXWW8btdof08fv9IftvSrXeqd5Tp04ZSebjjz8Oqefq1atOn5ZybH/2s5+ZuXPnmk8//dScOHHCLF++3ERGRpqNGzc6fVrSsW1J8/YXv/iFKSwsNKdPnzaffvqp8fl8xufzOe0tbd7WVG9Lmrc11dqS5m1NtTbHOdtiQtCyZctu2/7aa6+Zv/mbvzHBYNBZN2fOHNOrVy/n8d/93d+ZzMzMkO3S0tLMj370I2OMMVVVVcbr9ZqlS5c67RcuXDAul8u8++67xhhjjh49aiSZ/fv3O302b95sIiIizF/+8pc61Xizjz76yPTu3dscOXLklj8c33TgwAEjyezevdtZ99ZbbxmPx3PbbZpSrcbcud4bL6Z3+h201GNrjDGjR482kydPdh63pGPb0ubtzTZs2GAiIiJC/ujfrCXM25t9s96WNm9vVtOxNab5z9sbvllrc5yzLeLtMElavHixOnTooAEDBmjp0qWqrKx02goLCzV06FBFR0c76zIyMnT8+HH993//t9MnPT09ZJ8ZGRkqLCyUJJ06dUp+vz+kj8fjUVpamtOnsLBQcXFxGjRokNMnPT1dkZGR2rdvX73UWVpaqqlTp+o//uM/FBsbW2P/X//61+rZs6cef/zxkPWXLl1S165dlZKSonHjxunIkSNOW1OpVap9vU888YQSEhI0ZMgQ/ed//mdIW0s9tpIUCAQUHx8fsq6lHNuWNG9vVl5erjVr1uixxx5T69atq+3T3Oftze5Ub0uYtzerzbGVmve8vaG6WpvjnG0RIejHP/6x1q5dq507d+pHP/qRXnnlFb3wwgtOu9/vv+WTqG889vv9d+xzc/vN292uT0JCQkh7q1atFB8f7/SpC2OMnn32WU2fPj3k4N/OlStXtGbNGk2ZMiVkfa9evfTmm29qw4YNeuedd1RVVaXHHntMX3/9tVPHjdpudi9rlWpXb7t27fTzn/9c69at04cffqghQ4boySefDHlBbYnHVpLee+897d+/P+SrZlrSsW0p8/aGOXPmqG3bturQoYOKi4u1YcOGavs193l7w53qbSnztja1flNznrfSnWttjnO2yYagF198sdoL525evvzyS0n/+z1kw4YNU79+/TR9+nT9/Oc/14oVKxQMBhu5itqpba0rVqzQxYsXlZeXV6v9rl+/XhcvXlR2dnbIep/Pp0mTJql///769re/rffff1+dOnXSG2+80RDl3aI+6+3YsaNyc3OVlpamRx55RIsXL9YPf/hDLV269J7UUpOGOrY7d+7U5MmT9W//9m968MEHnfUt6dg2deG8RknS7Nmz9cUXX+j3v/+9oqKiNGnSJJlqPrC/uc/bG+5Ub0uZtzfU9tg2xXnbULU2F4323WE1+clPfqJnn332jn3uu+++atenpaWpsrJSp0+fVq9eveT1elVaWhrS58Zjr9fr/Le6Pje331iXlJQU0qd///5On7KyspB9VFZWqry83Nm+LrXu2LFDhYWFt3z3yqBBgzRhwgS9/fbbIet//etfa8yYMTV+H1vr1q01YMAAnThxosFrlRqu3hvS0tK0bds253FLO7YFBQUaO3asli1bpkmTJt1x38352LaUeXtDx44d1bFjR/Xs2VN9+vRRSkqK9u7dK5/PF7JNc5+34dZ7Q3Oct+HU2lTnbX3W2tTnbLXCuoKomXjnnXdMZGSkKS8vN8b838VaN1+olpeXd8vFWmPGjAnZj8/nu+VirVdffdVpDwQC1V6s9fnnnzt9tm7dWm8Xpv35z382hw4dcpatW7caSeZ3v/udKSkpCen7X//1XyYiIiLkDoTbqaysNL169TKzZs1qMrUaE169N/v7v/97M2DAAOdxSzq2O3fuNG3btjX/8i//Uqv9Nudj21LmbXX+/Oc/G0lm586dIetbwrytzu3qvVlznLfVqa7WljJvv+mbtTbHOdvsQ9CePXvMsmXLzIEDB8zJkyfNO++8Yzp16mQmTZrk9Llw4YJJTEw0EydONIcPHzZr1641sbGxt9y216pVK/Pqq6+aY8eOmfnz51d7215cXJzZsGGDOXjwoBk3bly1t+0NGDDA7Nu3z3zyySfmgQceaLBbi+90h8XcuXNNcnKyqaysvKVtwYIFZuvWrebkyZOmqKjIjB8/3sTExJgjR4402VqNqb7e1atXm/z8fHPs2DFz7Ngx87Of/cxERkaaN9980+nTUo7tjh07TGxsrMnLywu5lfb8+fNOn5Z0bFvKvN27d69ZsWKF+eKLL8zp06fN9u3bzWOPPWZ69Ohhrly5EtK3Jczb2tTbUuZtbWptKfO2NrU2xznb7ENQUVGRSUtLMx6Px8TExJg+ffqYV1555ZYXlz/96U9myJAhxuVymb/92781ixcvvmVf7733nunZs6eJjo42Dz74oPnwww9D2quqqsxLL71kEhMTjcvlMsOHDzfHjx8P6XP+/HnzzDPPmHbt2hm3220mT55sLl68WP+Fm9uHoOvXr5vOnTubn/70p9Vu9/zzz5suXbqY6Ohok5iYaEaPHm3++Mc/hvRparUac/sQ1KdPHxMbG2vcbrd59NFHzbp1627ZtiUc2+zsbCPpluXb3/6206clHVtjWsa8PXjwoPnOd75j4uPjjcvlMt26dTPTp083X3/9dUi/ljJva1NvS5m3tam1pczb2v5/3NzmbIQxzfiKJgAAgLvUZO8OAwAAaEiEIAAAYCVCEAAAsBIhCAAAWIkQBAAArEQIAgAAViIEAQAAKxGCAACAlQhBAADASoQgAABgJUIQAACwEiEIAABY6f8BuZoJxjSNTGkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Upper and lower bound\n", + "\n", + "query = \"energy: [-50000 TO -30000]\"\n", + "data = abcd.property('energy', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# No upper bound\n", + "\n", + "query = \"energy: [-50000 TO *]\"\n", + "data = abcd.property('energy', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# All values of an array\n", + "\n", + "query = None\n", + "data = abcd.property('forces', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1356\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAePklEQVR4nO3de3BU9fnH8c8GTAiQTUggCanh5g1QQSbUEEpb0EiIEWWMVixFsBGKE3Q0FiRTDGpVqPeRqthWuXSkXsYpKtjYFBRQYsQotWSUCg0TMGxgiGQJLUkg5/dHh+0vgpDLbs6T5f2aOTPunpPd5zsB8vbsno3HcRxHAAAAhkS4PQAAAMC3ESgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwp7vbA7RHc3OzqqurFRMTI4/H4/Y4AACgFRzH0eHDh5WSkqKIiNOfI+mSgVJdXa3U1FS3xwAAAO2wZ88enXvuuac9pksGSkxMjKT/LtDr9bo8DQAAaA2/36/U1NTAz/HT6ZKBcuJlHa/XS6AAANDFtObtGbxJFgAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCnu9sDAACA4Bm0YN0Zj9m9JKcTJukYzqAAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIereAAAOMt0hSt9OIMCAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgTne3BwAAAGc2aME6t0foVJxBAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABz2hQoixcv1ve//33FxMQoMTFRU6ZM0Y4dO1occ/ToUeXn5yshIUG9e/dWbm6uampqWhxTVVWlnJwc9ezZU4mJiZo3b56OHTvW8dUAAICw0KZA2bhxo/Lz8/XRRx+ppKRETU1Nmjhxoo4cORI45u6779bbb7+t119/XRs3blR1dbWuv/76wP7jx48rJydHjY2N2rJli1auXKkVK1aoqKgoeKsCAABdmsdxHKe9X3zgwAElJiZq48aN+tGPfqS6ujr169dPq1ev1g033CBJ+vLLLzVs2DCVlpZqzJgx+stf/qJrrrlG1dXVSkpKkiQtW7ZM9957rw4cOKDIyMgzPq/f71dsbKzq6urk9XrbOz4AAF3GoAXrOvX5di/JCfpjtuXnd4feg1JXVydJio+PlySVl5erqalJmZmZgWOGDh2qAQMGqLS0VJJUWlqqSy+9NBAnkpSVlSW/36+KiopTPk9DQ4P8fn+LDQAAhK92B0pzc7Puuusu/eAHP9All1wiSfL5fIqMjFRcXFyLY5OSkuTz+QLH/P84ObH/xL5TWbx4sWJjYwNbampqe8cGAABdQLsDJT8/X9u3b9crr7wSzHlOqbCwUHV1dYFtz549IX9OAADgnu7t+aK5c+dq7dq12rRpk84999zA/cnJyWpsbNShQ4danEWpqalRcnJy4JiPP/64xeOduMrnxDHfFhUVpaioqPaMCgAAuqA2nUFxHEdz587Vn//8Z23YsEGDBw9usT8tLU3nnHOO1q9fH7hvx44dqqqqUkZGhiQpIyND//jHP7R///7AMSUlJfJ6vRo+fHhH1gIAAMJEm86g5Ofna/Xq1XrzzTcVExMTeM9IbGysoqOjFRsbq7y8PBUUFCg+Pl5er1d33HGHMjIyNGbMGEnSxIkTNXz4cE2fPl2PPvqofD6fFi5cqPz8fM6SAAAASW0MlOeff16SNH78+Bb3L1++XDNnzpQkPfXUU4qIiFBubq4aGhqUlZWl5557LnBst27dtHbtWt1+++3KyMhQr169NGPGDD344IMdWwkAAAgbHfocFLfwOSgAgLMNn4MCAADgMgIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMzp7vYAAACc7QYtWOf2COZwBgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJjT5kDZtGmTJk+erJSUFHk8Hq1Zs6bF/pkzZ8rj8bTYJk2a1OKY2tpaTZs2TV6vV3FxccrLy1N9fX2HFgIAAMJHmwPlyJEjGjlypJ599tnvPGbSpEnat29fYPvTn/7UYv+0adNUUVGhkpISrV27Vps2bdLs2bPbPj0AAAhL3dv6BdnZ2crOzj7tMVFRUUpOTj7lvi+++ELFxcXaunWrRo8eLUlaunSprr76aj3++ONKSUlp60gAACDMhOQ9KO+//74SExN10UUX6fbbb9fBgwcD+0pLSxUXFxeIE0nKzMxURESEysrKTvl4DQ0N8vv9LTYAABC+gh4okyZN0qpVq7R+/Xr95je/0caNG5Wdna3jx49Lknw+nxITE1t8Tffu3RUfHy+fz3fKx1y8eLFiY2MDW2pqarDHBgAAhrT5JZ4zmTp1auC/L730Uo0YMULnnXee3n//fV155ZXteszCwkIVFBQEbvv9fiIFAIAwFvLLjIcMGaK+fftq586dkqTk5GTt37+/xTHHjh1TbW3td75vJSoqSl6vt8UGAADCV8gDZe/evTp48KD69+8vScrIyNChQ4dUXl4eOGbDhg1qbm5Wenp6qMcBAABdQJtf4qmvrw+cDZGkyspKbdu2TfHx8YqPj9cDDzyg3NxcJScna9euXZo/f77OP/98ZWVlSZKGDRumSZMmadasWVq2bJmampo0d+5cTZ06lSt4AACApHacQfnkk080atQojRo1SpJUUFCgUaNGqaioSN26ddPnn3+ua6+9VhdeeKHy8vKUlpamzZs3KyoqKvAYL7/8soYOHaorr7xSV199tcaNG6ff/e53wVsVAADo0tp8BmX8+PFyHOc797/77rtnfIz4+HitXr26rU8NAADOEvwuHgAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYE53twcAACCcDVqwzu0RuiTOoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwp82BsmnTJk2ePFkpKSnyeDxas2ZNi/2O46ioqEj9+/dXdHS0MjMz9dVXX7U4pra2VtOmTZPX61VcXJzy8vJUX1/foYUAAIDw0eZAOXLkiEaOHKlnn332lPsfffRRPfPMM1q2bJnKysrUq1cvZWVl6ejRo4Fjpk2bpoqKCpWUlGjt2rXatGmTZs+e3f5VAACAsNK9rV+QnZ2t7OzsU+5zHEdPP/20Fi5cqOuuu06StGrVKiUlJWnNmjWaOnWqvvjiCxUXF2vr1q0aPXq0JGnp0qW6+uqr9fjjjyslJaUDywEAAOEgqO9BqayslM/nU2ZmZuC+2NhYpaenq7S0VJJUWlqquLi4QJxIUmZmpiIiIlRWVnbKx21oaJDf72+xAQCA8NXmMyin4/P5JElJSUkt7k9KSgrs8/l8SkxMbDlE9+6Kj48PHPNtixcv1gMPPBDMUQEA6LBBC9a5PULY6hJX8RQWFqquri6w7dmzx+2RAABACAU1UJKTkyVJNTU1Le6vqakJ7EtOTtb+/ftb7D927Jhqa2sDx3xbVFSUvF5viw0AAISvoAbK4MGDlZycrPXr1wfu8/v9KisrU0ZGhiQpIyNDhw4dUnl5eeCYDRs2qLm5Wenp6cEcBwAAdFFtfg9KfX29du7cGbhdWVmpbdu2KT4+XgMGDNBdd92lhx56SBdccIEGDx6s++67TykpKZoyZYokadiwYZo0aZJmzZqlZcuWqampSXPnztXUqVO5ggcAAEhqR6B88sknmjBhQuB2QUGBJGnGjBlasWKF5s+fryNHjmj27Nk6dOiQxo0bp+LiYvXo0SPwNS+//LLmzp2rK6+8UhEREcrNzdUzzzwThOUAAIBw4HEcx3F7iLby+/2KjY1VXV0d70cBALgmnK/i2b0kJ+iP2Zaf313iKh4AAHB2IVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCnu9sDAABg0aAF69we4azGGRQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMzp7vYAAAB0tkEL1rk9As6AMygAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmBD1Q7r//fnk8nhbb0KFDA/uPHj2q/Px8JSQkqHfv3srNzVVNTU2wxwAAAF1YSM6gXHzxxdq3b19g++CDDwL77r77br399tt6/fXXtXHjRlVXV+v6668PxRgAAKCLCslH3Xfv3l3Jyckn3V9XV6cXX3xRq1ev1hVXXCFJWr58uYYNG6aPPvpIY8aMCcU4AACgiwnJGZSvvvpKKSkpGjJkiKZNm6aqqipJUnl5uZqampSZmRk4dujQoRowYIBKS0u/8/EaGhrk9/tbbAAAIHwFPVDS09O1YsUKFRcX6/nnn1dlZaV++MMf6vDhw/L5fIqMjFRcXFyLr0lKSpLP5/vOx1y8eLFiY2MDW2pqarDHBgAAhgT9JZ7s7OzAf48YMULp6ekaOHCgXnvtNUVHR7frMQsLC1VQUBC47ff7iRQAAMJYyC8zjouL04UXXqidO3cqOTlZjY2NOnToUItjampqTvmelROioqLk9XpbbAAAIHyFPFDq6+u1a9cu9e/fX2lpaTrnnHO0fv36wP4dO3aoqqpKGRkZoR4FAAB0EUF/ieeXv/ylJk+erIEDB6q6ulqLFi1St27ddPPNNys2NlZ5eXkqKChQfHy8vF6v7rjjDmVkZHAFDwAACAh6oOzdu1c333yzDh48qH79+mncuHH66KOP1K9fP0nSU089pYiICOXm5qqhoUFZWVl67rnngj0GAADowjyO4zhuD9FWfr9fsbGxqqur4/0oAIA2G7RgndsjmLd7SU7QH7MtP7/5XTwAAMAcAgUAAJhDoAAAAHMIFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJgT9N/FAwCAm/gY+/DAGRQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5hAoAADAHAIFAACYQ6AAAABzCBQAAGAOgQIAAMwhUAAAgDkECgAAMIdAAQAA5nR3ewAAAFpr0IJ1bo+ATsIZFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmcBUPAMAErtDB/8cZFAAAYA6BAgAAzCFQAACAOQQKAAAwh0ABAADmECgAAMAcAgUAAJhDoAAAAHMIFAAAYA6fJAsACDk+JRZtxRkUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwuMwYAdAiXECMUOIMCAADM4QwKAISh1pzV2L0kpxMmAdqHQAGAsxQRA8t4iQcAAJhDoAAAAHN4iQcAzqAzXwrhZRfgvziDAgAAzHE1UJ599lkNGjRIPXr0UHp6uj7++GM3xwEAAEa49hLPq6++qoKCAi1btkzp6el6+umnlZWVpR07digxMdGtsTodp3MBd3Xmh4wF67m64sxAW7l2BuXJJ5/UrFmzdOutt2r48OFatmyZevbsqZdeesmtkQAAgBGunEFpbGxUeXm5CgsLA/dFREQoMzNTpaWlJx3f0NCghoaGwO26ujpJkt/vD8l8lyx694zHbH8gKyjP1dzw7zMeE6p1wj2d+WesKz5Xax7Hmtb8PW3N33fAilD87DnxmI7jnPlgxwVff/21I8nZsmVLi/vnzZvnXH755Scdv2jRIkcSGxsbGxsbWxhse/bsOWMrdInLjAsLC1VQUBC43dzcrNraWiUkJMjj8cjv9ys1NVV79uyR1+t1cdLQY63h52xZp8Raw9HZsk6JtQaD4zg6fPiwUlJSznisK4HSt29fdevWTTU1NS3ur6mpUXJy8knHR0VFKSoqqsV9cXFxJx3n9XrD/g/NCaw1/Jwt65RYazg6W9YpsdaOio2NbdVxrrxJNjIyUmlpaVq/fn3gvubmZq1fv14ZGRlujAQAAAxx7SWegoICzZgxQ6NHj9bll1+up59+WkeOHNGtt97q1kgAAMAI1wLlpptu0oEDB1RUVCSfz6fLLrtMxcXFSkpKavNjRUVFadGiRSe9DBSOWGv4OVvWKbHWcHS2rFNirZ3N4zitudYHAACg8/C7eAAAgDkECgAAMIdAAQAA5hAoAADAnC4fKA8//LDGjh2rnj17nvLD2yTpzjvvVFpamqKionTZZZd16nzB1Jq1VlVVKScnRz179lRiYqLmzZunY8eOde6gIfDpp5/qqquuUlxcnBISEjR79mzV19e7PVbQ/fOf/9R1112nvn37yuv1aty4cXrvvffcHivo3n//fXk8nlNuW7dudXu8kFi3bp3S09MVHR2tPn36aMqUKW6PFBKDBg066Xu6ZMkSt8cKmYaGBl122WXyeDzatm2b2+OExLXXXqsBAwaoR48e6t+/v6ZPn67q6uqQP2+XD5TGxkbdeOONuv3220973M9//nPddNNNnTRVaJxprcePH1dOTo4aGxu1ZcsWrVy5UitWrFBRUVEnTxpc1dXVyszM1Pnnn6+ysjIVFxeroqJCM2fOdHu0oLvmmmt07NgxbdiwQeXl5Ro5cqSuueYa+Xw+t0cLqrFjx2rfvn0ttttuu02DBw/W6NGj3R4v6N544w1Nnz5dt956q/7+97/rww8/1E9/+lO3xwqZBx98sMX39o477nB7pJCZP39+qz62vSubMGGCXnvtNe3YsUNvvPGGdu3apRtuuCH0TxycX//nvuXLlzuxsbGnPWbRokXOyJEjO2WeUPqutb7zzjtORESE4/P5Avc9//zzjtfrdRoaGjpxwuB64YUXnMTEROf48eOB+z7//HNHkvPVV1+5OFlwHThwwJHkbNq0KXCf3+93JDklJSUuThZ6jY2NTr9+/ZwHH3zQ7VGCrqmpyfne977n/OEPf3B7lE4xcOBA56mnnnJ7jE7xzjvvOEOHDnUqKiocSc5nn33m9kid4s0333Q8Ho/T2NgY0ufp8mdQ8D+lpaW69NJLW3zYXVZWlvx+vyoqKlycrGMaGhoUGRmpiIj//XGNjo6WJH3wwQdujRV0CQkJuuiii7Rq1SodOXJEx44d0wsvvKDExESlpaW5PV5IvfXWWzp48GBYfpL0p59+qq+//loREREaNWqU+vfvr+zsbG3fvt3t0UJmyZIlSkhI0KhRo/TYY4+FxcvM31ZTU6NZs2bpj3/8o3r27On2OJ2mtrZWL7/8ssaOHatzzjknpM9FoIQRn8930ifxnrjdlV8iuOKKK+Tz+fTYY4+psbFR33zzjRYsWCBJ2rdvn8vTBY/H49Hf/vY3ffbZZ4qJiVGPHj305JNPqri4WH369HF7vJB68cUXlZWVpXPPPdftUYLuX//6lyTp/vvv18KFC7V27Vr16dNH48ePV21trcvTBd+dd96pV155Re+9955+8Ytf6JFHHtH8+fPdHiuoHMfRzJkzNWfOnLB8SfJU7r33XvXq1UsJCQmqqqrSm2++GfLnNBkoCxYs+M430J3YvvzyS7fHDIqzaa3f1tq1X3zxxVq5cqWeeOIJ9ezZU8nJyRo8eLCSkpJanFWxqrXrdBxH+fn5SkxM1ObNm/Xxxx9rypQpmjx5cpcJsfb8ed67d6/effdd5eXluTR1+7R2rc3NzZKkX/3qV8rNzVVaWpqWL18uj8ej119/3eVVtE5bvq8FBQUaP368RowYoTlz5uiJJ57Q0qVL1dDQ4PIqzqy161y6dKkOHz6swsJCt0dut7b+XZ03b54+++wz/fWvf1W3bt10yy23yAnxB9Gb/Kj7AwcO6ODBg6c9ZsiQIYqMjAzcXrFihe666y4dOnToO7/m/vvv15o1a0y90zqYay0qKtJbb73VYn2VlZUaMmSIPv30U40aNSqYo3dYe9ZeU1OjXr16yePxyOv16pVXXtGNN94Y6lE7pLXr3Lx5syZOnKhvvvmmxa83v+CCC5SXlxc4a2RZe76nv/71r7V06VJ9/fXXIT9lHEytXeuHH36oK664Qps3b9a4ceMC+9LT05WZmamHH3441KN2WHu+rydUVFTokksu0ZdffqmLLrooVCMGRWvX+ZOf/ERvv/22PB5P4P7jx4+rW7dumjZtmlauXBnqUTusI9/TvXv3KjU1VVu2bFFGRkaoRnTvlwWeTr9+/dSvXz+3x+gUwVxrRkaGHn74Ye3fv1+JiYmSpJKSEnm9Xg0fPjwozxFM7Vn7iZesXnrpJfXo0UNXXXVVKEYLqtau89///rcknXRWKCIiIvB/4da19XvqOI6WL1+uW265pUvFidT6tZ74iIMdO3YEAqWpqUm7d+/WwIEDQz1mUHTk36lt27YpIiIi8G+SZa1d5zPPPKOHHnoocLu6ulpZWVl69dVXlZ6eHsoRg6Yj39MT/x6F+qyYyUBpi6qqKtXW1qqqqkrHjx8PnD04//zz1bt3b0nSzp07VV9fL5/Pp//85z+BY4YPH37KOrTqTGudOHGihg8frunTp+vRRx+Vz+fTwoULlZ+f3+V/++Zvf/tbjR07Vr1791ZJSYnmzZunJUuWfOfnwXRFGRkZ6tOnj2bMmKGioiJFR0fr97//vSorK5WTk+P2eCGxYcMGVVZW6rbbbnN7lJDxer2aM2eOFi1apNTUVA0cOFCPPfaYJJk/+9dWpaWlKisr04QJExQTE6PS0lLdfffd+tnPfhZW76MaMGBAi9snftacd955Yfc+qrKyMm3dulXjxo1Tnz59tGvXLt13330677zzQnr2RFLXv8x4xowZjqSTtvfeey9wzI9//ONTHlNZWena3O3RmrXu3r3byc7OdqKjo52+ffs699xzj9PU1OTe0EEyffp0Jz4+3omMjHRGjBjhrFq1yu2RQmLr1q3OxIkTnfj4eCcmJsYZM2aM884777g9VsjcfPPNztixY90eI+QaGxude+65x0lMTHRiYmKczMxMZ/v27W6PFXTl5eVOenq6Exsb6/To0cMZNmyY88gjjzhHjx51e7SQqqysDNvLjD///HNnwoQJTnx8vBMVFeUMGjTImTNnjrN3796QP7fJ96AAAICzm/1LIAAAwFmHQAEAAOYQKAAAwBwCBQAAmEOgAAAAcwgUAABgDoECAADMIVAAAIA5BAoAADCHQAEAAOYQKAAAwBwCBQAAmPN/RJGEXiB1njkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Range for an array (match if any values of array lie in range)\n", + "\n", + "query = \"forces: [-5 TO -3]\"\n", + "print(abcd.count(query))\n", + "data = abcd.property('forces', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Script queries" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "forces = [atoms.arrays[\"forces\"] for atoms in abcd.get_atoms()]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(114, 3)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "forces[0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "data = [np.mean(force) for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Mean\")\n", + "plt.show()\n", + "\n", + "data = [force.min() for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Min\")\n", + "plt.show()\n", + "\n", + "data = [force.max() for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Max\")\n", + "plt.show()\n", + "\n", + "data = [force[0][0] for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"First force\")\n", + "plt.show()\n", + "\n", + "data = [force[-1][-1] for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Last force\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Visualise forces\n", + "\n", + "query = None\n", + "data = abcd.property('forces', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.show()\n", + "\n", + "data = abcd.count_property(\"forces\", query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "366\n" + ] + } + ], + "source": [ + "count = 0\n", + "for force in forces:\n", + " if np.sum(np.logical_and((force < 3), force > -3)) == force.size:\n", + " count +=1\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "366\n" + ] + } + ], + "source": [ + "# Check all forces for each structure are between -3 and 3\n", + "\n", + "script = \"\"\"\n", + "int count = 0;\n", + "for (int i=0; i 3 || doc.forces[i] < -3) {\n", + " count += 1; \n", + " }\n", + "}\n", + "return count == 0;\n", + "\"\"\"\n", + "\n", + "query = {\n", + " \"bool\": {\n", + " \"filter\": {\n", + " \"script\": {\n", + " \"script\": script\n", + " }\n", + " }\n", + " },\n", + "}\n", + "\n", + "\n", + "print(abcd.count(query=query, timeout=90))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1321\n" + ] + } + ], + "source": [ + "# Check forces no greater than 3\n", + "\n", + "count = 0\n", + "for force in forces:\n", + " if force.max() > 3:\n", + " count +=1\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1321\n" + ] + } + ], + "source": [ + "# Check forces no greater than 3\n", + "\n", + "script = \"\"\"\n", + "double max = doc.forces[0];\n", + "for (int i=1; i 3.0;\n", + "\"\"\"\n", + "\n", + "\n", + "query = {\n", + " \"bool\": {\n", + " \"filter\": {\n", + " \"script\": {\n", + " \"script\": script\n", + " }\n", + " }\n", + " },\n", + "}\n", + "\n", + "\n", + "print(abcd.count(query=query, timeout=60))" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1010\n" + ] + } + ], + "source": [ + "# Check average force less than 0\n", + "\n", + "count = 0\n", + "for force in forces:\n", + " if np.mean(force) < 0.0:\n", + " count +=1\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1005\n" + ] + } + ], + "source": [ + "# Check average force less than 0\n", + "\n", + "script = \"\"\"\n", + "double force = 0;\n", + "for (int i=0; i Date: Tue, 30 Apr 2024 13:38:05 +0000 Subject: [PATCH 067/112] Allow multiple properties to be returned --- abcd/backends/atoms_opensearch.py | 116 ++++++++++++++++++++++-------- 1 file changed, 86 insertions(+), 30 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 8e66bfd5..bb1eb1af 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -540,53 +540,109 @@ def count(self, query: Union[dict, str, None] = None, timeout=30.0) -> int: "count" ] - def property(self, name, query: Union[dict, str, None] = None) -> list: + def _get_props_from_source( + self, + names: Union[str, list[str]], + query: Union[dict, str, None] = None, + ) -> dict: + """ + Gets all values of specified properties using the original data from _source. + + Parameters + ---------- + names: Union[str, list[str]] + Name or list of names of properties to return. + query: Union[dict, str, None] + Query to filter documents to get properties from. Default is `None`. + + Returns + ------- + dict + Dictionary of lists of values for the specified properties. + """ + props = {} + hits = [ + dict(hit["_source"].items()) + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + stored_fields=names, + _source=names, + ) + if "_source" in hit and all(name in hit["_source"] for name in names) + ] + for name in names: + props[name] = [hit[name] for hit in hits] + return props + + def property( + self, + names: Union[str, list[str]], + allow_flatten: bool = True, + query: Union[dict, str, None] = None, + ) -> Union[dict, list]: """ - Gets all values of a specified property for matching documents in the - database. Alternative methods, such as count_property, may be faster. + Gets all values of specified properties for matching documents in the database. Parameters ---------- + names: Union[str, list[str]] + Name or list of names of properties to return. + allow_flatten: bool = True + Whether to allow arrays to be returned flattened. There is no guarantee + for the order of returned values. Default is `True`. query: Union[dict, str, None] Query to filter documents to get properties from. Default is `None`. Returns ------- - list - List of values for the specified property for all matching documents. + Union[dict, list] + Dictionary of lists of values for the specified properties, or list + if only one property is given. """ query = self.parser(query) query = { "query": query, } + if isinstance(names, str): + names = [names] + names = [format(name) for name in names] + # Try to use docvalue_fields to avoid loading entire document # But not all datatypes supported by default - try: - return [ - hit["fields"][format(name)][0] - for hit in helpers.scan( - self.client, - index=self.index_name, - query=query, - _source=False, - stored_fields="_none_", - docvalue_fields=[format(name)], - ) - if "fields" in hit and format(name) in hit["fields"] - ] - except RequestError: - return [ - hit["_source"][format(name)] - for hit in helpers.scan( - self.client, - index=self.index_name, - query=query, - stored_fields=format(name), - _source=format(name), - ) - if format(name) in hit["_source"] - ] + if allow_flatten: + props = {} + try: + hits = [ + dict(hit["fields"].items()) + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + _source=False, + stored_fields="_none_", + docvalue_fields=names, + ) + if "fields" in hit and all(name in hit["fields"] for name in names) + ] + for name in names: + props[name] = [ + hit[name][0] if len(hit[name]) == 1 else hit[name] + for hit in hits + ] + + except RequestError: + props = self._get_props_from_source(names, query) + + # Use _source to ensure arrays are not flattened + else: + props = self._get_props_from_source(names, query) + + if len(names) == 1: + return props[names[0]] + return props def count_property(self, name, query: Union[dict, str, None] = None) -> dict: """ From 16a12487c0bee9acd46116b24228812ae0971455 Mon Sep 17 00:00:00 2001 From: ElliottKasoar Date: Tue, 30 Apr 2024 13:38:30 +0000 Subject: [PATCH 068/112] Fix CLI code formatting --- abcd/frontends/commandline/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index faa7233e..1bf9ebed 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -188,7 +188,7 @@ def key_add(*, db, query, keys, **kwargs): test = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) else: if isinstance(db, OpenSearchDatabase): - test = ' OR '.join([f"{key}:*" for key in data.keys()]) + test = " OR ".join([f"{key}:*" for key in data.keys()]) else: test = ("OR", *(("NAME", key) for key in data.keys())) From 1cb7ab24f0dcdd320b659286a62ab4ac9ecb4d2c Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:56:28 +0200 Subject: [PATCH 069/112] Apply suggestions from code review Co-authored-by: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> --- abcd/__init__.py | 3 ++- abcd/backends/atoms_opensearch.py | 28 +++++++++++++--------------- abcd/backends/atoms_properties.py | 13 ++++++------- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/abcd/__init__.py b/abcd/__init__.py index 2bb82cf9..8fe111f7 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -45,7 +45,8 @@ def from_url(cls, url, **kwargs): from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, host=r.geturl(), uri_mode=True, **kwargs) - if r.scheme == "opensearch": + r.scheme = ConnectionType[r.scheme] + if r.scheme is ConnectionType.opensearch: conn_settings = { "host": r.hostname, "port": r.port, diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index bb1eb1af..78af1b08 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -196,7 +196,7 @@ def save(self): if not self._id: self._client.index(index=self._index_name, body=body) else: - body.pop("_id", None) + del body["_id"] body = {"doc": body} self._client.update(index=self._index_name, id=self._id, body=body) @@ -281,11 +281,11 @@ def __init__( info = self.client.info() logger.info("DB info: %s", info) - except AuthenticationException: - raise abcd.errors.AuthenticationError() + except AuthenticationException as err: + raise abcd.errors.AuthenticationError() from err - except ConnectionTimeout: - raise abcd.errors.TimeoutError() + except ConnectionTimeout as err: + raise abcd.errors.TimeoutError() from err self.db = db_name self.index_name = index_name @@ -677,9 +677,9 @@ def count_property(self, name, query: Union[dict, str, None] = None) -> dict: prop = {} - for val in self.client.search(index=self.index_name, body=body,)[ - "aggregations" - ][format(name)]["buckets"]: + for val in self.client.search( + index=self.index_name, body=body + )["aggregations"][format(name)]["buckets"]: prop[val["key"]] = val["doc_count"] return prop @@ -728,8 +728,7 @@ def properties(self, query: Union[dict, str, None] = None) -> dict: body=body, ) - derived = ["info_keys", "derived_keys", "arrays_keys"] - for label in derived: + for label in ("info_keys", "derived_keys", "arrays_keys"): count = res["aggregations"][label]["doc_count"] if count > 0: key = label.split("_", maxsplit=1)[0] @@ -824,8 +823,7 @@ def count_properties(self, query: Union[dict, str, None] = None) -> dict: body=body, ) - derived = ["info_keys", "derived_keys", "arrays_keys"] - for label in derived: + for label in ("info_keys", "derived_keys", "arrays_keys"): count = res["aggregations"][label]["doc_count"] if count > 0: properties[key] = { @@ -980,9 +978,9 @@ def __repr__(self): host, port = None, None return ( - "{}(".format(self.__class__.__name__) - + "url={}:{}, ".format(host, port) - + "index={}) ".format(self.index_name) + f"{self.__class__.__name__}(" + f"url={host}:{port}, " + f"index={self.index_name}) " ) def _repr_html_(self): diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 5503c398..5ce1ccb2 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -95,7 +95,7 @@ def __init__( self.df.replace({np.nan: None}, inplace=True) if units is not None: - for key in units.keys(): + for key in units: if key not in self.df.columns.values: raise ValueError( f"Invalid field name: {key}. Keys in `units` must " @@ -109,20 +109,19 @@ def __init__( self.store_struct_file = store_struct_file if self.store_struct_file: - if struct_file_template is not None: - self.struct_file_template = struct_file_template - else: + if struct_file_template is None: raise ValueError( "`struct_file_template` must be specified if " "store_struct_file is True." ) - if struct_name_label is not None: - self.struct_name_label = struct_name_label - else: + self.struct_file_template = struct_file_template + + if struct_name_label is None: raise ValueError( "`struct_name_label` must be specified if store_struct_file is" " True." ) + self.struct_name_label = struct_name_label self.set_struct_files() def _separate_units(self): From b79779dc36f90d490d3ed9b871445a2c51cd1f01 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 08:20:04 +0100 Subject: [PATCH 070/112] Tidy for flake8 --- .flake8 | 2 +- abcd/backends/atoms_opensearch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.flake8 b/.flake8 index 7db48f62..94f58d25 100644 --- a/.flake8 +++ b/.flake8 @@ -2,7 +2,7 @@ select = B,BLK,C,E,F,I,S,W max-complexity = 30 max-line-length = 88 -ignore = E203,W503 +ignore = E203,W503 # ignore conflicts with black application-import-names = abcd,tests import-order-style = google exclude = diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 78af1b08..1c1f0c9c 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -886,7 +886,7 @@ def rename_property( query = self.parser(query) logger.info("rename: query=%s, old=%s, new=%s", query, name, new_name) - script_txt = f"if (!ctx._source.containsKey(params.new_name)) {{ " + script_txt = "if (!ctx._source.containsKey(params.new_name)) {{ " script_txt += ( f"ctx._source.{new_name} = ctx._source.{name};" " ctx._source.remove(params.name);" From 2668db9768f58957443575158d347fb5c4123251 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 08:26:50 +0100 Subject: [PATCH 071/112] Tidy README formatting --- README.md | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index e1fb3d3c..938a3413 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,13 @@ Main features: creating tables and views -``` +```sh $ pip install git+https://github.com/libAtoms/abcd.git ``` Example Docker installation on Ubuntu: -``` + +```sh sudo apt-get update sudo apt upgrade sudo apt install docker.io @@ -37,13 +38,13 @@ newgrp docker # or exit and log in Docker can be tested by running: -``` +```sh docker run hello-world ``` Example Python setup on Ubuntu (pip must be updated for poetry to be used successfully): -``` +```sh sudo apt install software-properties-common sudo add-apt-repository ppa:deadsnakes/ppa sudo apt install python3.10 @@ -56,7 +57,7 @@ curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 Building and installing ABCD dependencies via poetry: -``` +```sh git clone https://github.com/libAtoms/abcd.git curl -sSL https://install.python-poetry.org | python3 - export PATH="/home/ubuntu/.local/bin:$PATH" @@ -69,33 +70,34 @@ poetry build If you have an already running MongoDB server, or install your own, then you are ready to go. Alternatively, -``` +```sh docker run -d --rm --name abcd-mongodb -v :/data/db -p 27017:27017 mongo ``` will download and install a docker and run a database in it. To connect to a mongodb that is already running, use -``` + +```sh abcd login mongodb://localhost ``` If you are running `abcd` inside a docker, and want to connect to a mongodb outside that docker use something like this (example is for Mac OS): -``` +```sh abcd login mongodb://docker.for.mac.localhost ``` The above login command will place create an `~/.abcd` file with the following contents: -``` +```sh {"url": "mongodb://localhost"} ``` ### OpenSearch If you have an already running OpenSearch server, or install your own, then you are ready to go. Alternatively, -``` +```sh sudo swapoff -a # optional sudo sysctl -w vm.swappiness=1 # optional sudo sysctl -w fs.file-max=262144 # optional @@ -105,24 +107,27 @@ docker run -d --rm --name abcd-opensearch -v your@email ``` Then you'll be able to access the database remotely using, e.g. -``` + +```sh ssh abcd@your.machine summary ``` @@ -135,17 +140,17 @@ The database has a simple GUI, coupled with a visualiser. Data for now needs to Currently a manual uploaded image is available, that was built on 7/2/2020 by Tamas K. Stenczel. To access it: 1. pull the image - ``` + ```sh docker pull stenczelt/projection-abcd:latest ``` -2. create a docker network, which enables the containers to communicate with each other and the outside world as well - ``` +2. create a docker network, which enables the containers to communicate with each other and the outside world as well + ```sh docker network create --driver bridge abcd-network ``` 3. run the mongo (ABCD) and the visualiser as well - ``` + ```sh docker run -d --rm --name abcd-mongodb-net -v :/data/db -p 27017:27017 --network abcd-network mongo docker run -it --rm --name visualiser-dev -p 9999:9999 --network abcd-network stenczelt/projection-abcd @@ -156,7 +161,8 @@ To access it: This will start the visualiser with ABCD integration! Have fun! After usage, for cleanup: -``` + +```sh docker stop visualiser-dev abcd-mongodb-net # stop the containers docker rm visualiser-dev abcd-mongodb-net # remove them if --rm did not docker network rm abcd-network # remove the docker network @@ -166,6 +172,6 @@ docker network rm abcd-network # remove the docker network Unit tests are automatically run on push and creation of pull requests. Unit testing using mock databases can also be run in the command line using: -``` +```sh python -m unittest tests ``` From facc917d01d35d8114e46ca9e0fdd8a9e3db226d Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 08:46:50 +0100 Subject: [PATCH 072/112] Tidy optional type hints --- abcd/backends/atoms_opensearch.py | 88 +++++++++++++++---------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 1c1f0c9c..498a4e39 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -2,7 +2,7 @@ from collections.abc import Generator from datetime import datetime -from typing import Union, Iterable +from typing import Iterable, Optional, Union import logging from os import linesep from pathlib import Path @@ -108,29 +108,29 @@ class AtomsModel(AbstractModel): Attributes ---------- - _client: Union[OpenSearch, None] + _client: Optional[OpenSearch] OpenSearch client. - _index_name: Union[str, None] + _index_name: Optional[str] OpenSearch index name. """ def __init__( self, - client: Union[OpenSearch, None] = None, - index_name: Union[str, None] = None, - dict: Union[dict, None] = None, + client: Optional[OpenSearch] = None, + index_name: Optional[str] = None, + dict: Optional[dict] = None, ): """ Initialises class. Parameters ---------- - client: Union[OpenSearch, None] - OpenSearch client. - index_name: Union[str, None] - OpenSearch index name. - dict: dict - Dictionary of atoms data. + client: Optional[OpenSearch] + OpenSearch client. Default is `None`. + index_name: Optional[str] + OpenSearch index name. Default is `None`. + dict: Optional[dict] + Dictionary of atoms data. Default is `None`. """ super().__init__(dict) @@ -143,7 +143,7 @@ def from_atoms( client: OpenSearch, index_name: str, atoms: Atoms, - extra_info: Union[dict, None] = None, + extra_info: Optional[dict] = None, store_calc: bool = True, ) -> AtomsModel: """ @@ -157,7 +157,7 @@ def from_atoms( OpenSearch index name. atoms: Atoms Atoms data to be stored. - extra_info: Union[dict, None], optional + extra_info: Optional[dict] Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional @@ -316,13 +316,13 @@ def info(self): "type": "opensearch", } - def delete(self, query: Union[dict, str, None] = None): + def delete(self, query: Optional[dict, str] = None): """ Deletes documents from the database. Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to be deleted. Default is `None`. """ query = self.parser(query) @@ -375,7 +375,7 @@ def save_bulk(self, actions: Iterable, **kwargs): def push( self, atoms: Union[Atoms, Iterable], - extra_info: Union[dict, str, list, None] = None, + extra_info: Optional[dict, str, list] = None, store_calc: bool = True, **kwargs, ): @@ -385,7 +385,7 @@ def push( Parameters ---------- atoms: Union[Atoms, Iterable] - extra_info: Union[dict, str, None], optional + extra_info: Optional[dict, str], optional Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional @@ -430,7 +430,7 @@ def push( def upload( self, file: Path, - extra_infos: Union[Iterable, dict, None] = None, + extra_infos: Optional[Iterable, dict] = None, store_calc: bool = True, ): """ @@ -440,7 +440,7 @@ def upload( ---------- file: Path Path to file to be uploaded - extra_infos: Union[Iterable, dict, None], optional + extra_infos: Optional[Iterable, dict] Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional @@ -462,14 +462,14 @@ def upload( self.push(data, extra_info, store_calc=store_calc) def get_items( - self, query: Union[dict, str, None] = None + self, query: Optional[dict, str] = None ) -> Generator[dict, None, None]: """ Get data as a dictionary from documents in the database. Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to get data from. Default is `None`. Returns @@ -489,14 +489,14 @@ def get_items( yield {"_id": hit["_id"], **hit["_source"]} def get_atoms( - self, query: Union[dict, str, None] = None + self, query: Optional[dict, str] = None ) -> Generator[Atoms, None, None]: """ Get data as Atoms object from documents in the database. Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to get data from. Default is `None`. Returns @@ -515,13 +515,13 @@ def get_atoms( ): yield AtomsModel(None, None, hit["_source"]).to_ase() - def count(self, query: Union[dict, str, None] = None, timeout=30.0) -> int: + def count(self, query: Optional[dict, str] = None, timeout=30.0) -> int: """ Counts number of documents in the database. Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to be counted. Default is `None`. timeout: float Timeout for request in seconds. @@ -543,7 +543,7 @@ def count(self, query: Union[dict, str, None] = None, timeout=30.0) -> int: def _get_props_from_source( self, names: Union[str, list[str]], - query: Union[dict, str, None] = None, + query: Optional[dict, str] = None, ) -> dict: """ Gets all values of specified properties using the original data from _source. @@ -552,7 +552,7 @@ def _get_props_from_source( ---------- names: Union[str, list[str]] Name or list of names of properties to return. - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to get properties from. Default is `None`. Returns @@ -580,7 +580,7 @@ def property( self, names: Union[str, list[str]], allow_flatten: bool = True, - query: Union[dict, str, None] = None, + query: Optional[dict, str] = None, ) -> Union[dict, list]: """ Gets all values of specified properties for matching documents in the database. @@ -592,7 +592,7 @@ def property( allow_flatten: bool = True Whether to allow arrays to be returned flattened. There is no guarantee for the order of returned values. Default is `True`. - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to get properties from. Default is `None`. Returns @@ -644,7 +644,7 @@ def property( return props[names[0]] return props - def count_property(self, name, query: Union[dict, str, None] = None) -> dict: + def count_property(self, name, query: Optional[dict, str] = None) -> dict: """ Counts values of a specified property for matching documents in the database. This method much faster than performing a Count on the list @@ -652,7 +652,7 @@ def count_property(self, name, query: Union[dict, str, None] = None) -> dict: Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to count properties from. Default is `None`. Returns @@ -684,14 +684,14 @@ def count_property(self, name, query: Union[dict, str, None] = None) -> dict: return prop - def properties(self, query: Union[dict, str, None] = None) -> dict: + def properties(self, query: Optional[dict, str] = None) -> dict: """ Gets lists of all properties from matching documents, separated into info, derived, and array properties. Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to get properties from. Default is `None`. Returns @@ -777,13 +777,13 @@ def get_type_of_property(self, prop: str, category: str) -> str: return "vector({})".format(map_types[type(data[0])]) return "scalar({})".format(map_types[type(data)]) - def count_properties(self, query: Union[dict, str, None] = None) -> dict: + def count_properties(self, query: Optional[dict, str] = None) -> dict: """ Counts all properties from matching documents. Parameters ---------- - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to count properties from. Default is `None`. Returns @@ -836,7 +836,7 @@ def count_properties(self, query: Union[dict, str, None] = None) -> dict: return properties - def add_property(self, data: dict, query: Union[dict, str, None] = None): + def add_property(self, data: dict, query: Optional[dict, str] = None): """ Adds properties to matching documents. @@ -844,7 +844,7 @@ def add_property(self, data: dict, query: Union[dict, str, None] = None): ---------- data: dict Property key-value pairs to be added to matching documents. - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to add properties to. Default is `None`. """ query = self.parser(query) @@ -869,7 +869,7 @@ def add_property(self, data: dict, query: Union[dict, str, None] = None): ) def rename_property( - self, name: str, new_name: str, query: Union[dict, str, None] = None + self, name: str, new_name: str, query: Optional[dict, str] = None ): """ Renames property for all matching documents. @@ -880,7 +880,7 @@ def rename_property( Current name of property to be renamed. new_name: str New name of property to be renamed. - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to rename property. Default is `None`. """ query = self.parser(query) @@ -906,7 +906,7 @@ def rename_property( self.client.update_by_query(index=self.index_name, body=body) - def delete_property(self, name: str, query: Union[dict, str, None] = None): + def delete_property(self, name: str, query: Optional[dict, str] = None): """ Deletes property from all matching documents. @@ -914,7 +914,7 @@ def delete_property(self, name: str, query: Union[dict, str, None] = None): ---------- name: str Name of property to be deleted from documents. - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents to have property deleted. Default is `None`. """ query = self.parser(query) @@ -940,7 +940,7 @@ def delete_property(self, name: str, query: Union[dict, str, None] = None): self.client.update_by_query(index=self.index_name, body=body) def hist( - self, name: str, query: Union[dict, str, None] = None, **kwargs + self, name: str, query: Optional[dict, str] = None, **kwargs ) -> Union[dict, None]: """ Calculate histogram statistics for a property from all matching documents. @@ -949,7 +949,7 @@ def hist( ---------- name: str Name of property. - query: Union[dict, str, None] + query: Optional[dict, str] Query to filter documents. Default is `None`. Returns From 40ed1c9f6e5cffe237c6ff860bea7addeb6fff11 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 08:50:25 +0100 Subject: [PATCH 073/112] Add return type hint --- abcd/backends/atoms_opensearch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 498a4e39..e1534c24 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -174,13 +174,14 @@ def from_atoms( return obj @property - def _id(self): + def _id(self) -> Union[str, None]: """ - Gets the OpenSearch document ID stored in data. + Get the OpenSearch document ID stored in data. Returns ------- - Current document ID. + Union[str, None] + Current document ID. """ return self.get("_id", None) From d8c980c3c29ee547c538a4848ef5b1c163a6a88d Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:13:42 +0200 Subject: [PATCH 074/112] Apply suggestions from code review Co-authored-by: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> --- abcd/frontends/commandline/commands.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index 1bf9ebed..d3c57cb1 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -19,8 +19,7 @@ def login(*, config, name, url, disable_ssl=False, **kwargs): db = ABCD.from_url(url=url, use_ssl=(not disable_ssl)) info = db.info() - config["url"] = url - config["use_ssl"] = not disable_ssl + config.update(url=url, use_ssl=not disable_ssl) config.save() print("Successfully connected to the database!") @@ -181,21 +180,21 @@ def key_add(*, db, query, keys, **kwargs): if query: if isinstance(db, OpenSearchDatabase): test = [ - f"{query} AND ({' OR '.join([f'{key}:*' for key in data.keys()])})" + f"{query} AND ({' OR '.join(f'{key}:*' for key in data)})" for query in query ] else: test = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) else: if isinstance(db, OpenSearchDatabase): - test = " OR ".join([f"{key}:*" for key in data.keys()]) + test = " OR ".join(f"{key}:*" for key in data) else: test = ("OR", *(("NAME", key) for key in data.keys())) if db.count(query=test): print( - "The new key already exist for the given query! " - "Please make sure that the target key name don't exist" + "The new key already exists for the given query! " + "Please make sure that the target key name doesn't exist" ) exit(1) @@ -227,7 +226,7 @@ def key_delete(*, db, query, yes, keys, **kwargs): if isinstance(db, OpenSearchDatabase): query = [ - f"{query} AND ({' OR '.join([f'{key}:*' for key in data.keys()])})" + f"{query} AND ({' OR '.join(f'{key}:*' for key in data)})" for query in query ] else: From c3b6d9cfa9d2af8292a8ec383480dc6efbc83482 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:24:35 +0200 Subject: [PATCH 075/112] Update abcd/backends/atoms_opensearch.py Co-authored-by: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> --- abcd/backends/atoms_opensearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index e1534c24..7b0f0093 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -386,7 +386,7 @@ def push( Parameters ---------- atoms: Union[Atoms, Iterable] - extra_info: Optional[dict, str], optional + extra_info: Optional[Union[dict, str]], optional Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional From c6b9f431d21c27bb92674e21cc056c7f695a9b25 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:28:37 +0100 Subject: [PATCH 076/112] Fix type Optional Union type hints --- abcd/backends/atoms_opensearch.py | 60 +++++++++++++++---------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 7b0f0093..6e675078 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -317,13 +317,13 @@ def info(self): "type": "opensearch", } - def delete(self, query: Optional[dict, str] = None): + def delete(self, query: Optional[Union[dict, str]] = None): """ Deletes documents from the database. Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to be deleted. Default is `None`. """ query = self.parser(query) @@ -376,7 +376,7 @@ def save_bulk(self, actions: Iterable, **kwargs): def push( self, atoms: Union[Atoms, Iterable], - extra_info: Optional[dict, str, list] = None, + extra_info: Optional[Union[dict, str, list]] = None, store_calc: bool = True, **kwargs, ): @@ -386,7 +386,7 @@ def push( Parameters ---------- atoms: Union[Atoms, Iterable] - extra_info: Optional[Union[dict, str]], optional + extra_info: Optional[Union[dict, str, list]] Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional @@ -431,7 +431,7 @@ def push( def upload( self, file: Path, - extra_infos: Optional[Iterable, dict] = None, + extra_infos: Optional[Union[Iterable, dict]] = None, store_calc: bool = True, ): """ @@ -441,7 +441,7 @@ def upload( ---------- file: Path Path to file to be uploaded - extra_infos: Optional[Iterable, dict] + extra_infos: Optional[Union[Iterable, dict]] Extra information to store in the document with the atoms data. Default is `None`. store_calc: bool, optional @@ -463,14 +463,14 @@ def upload( self.push(data, extra_info, store_calc=store_calc) def get_items( - self, query: Optional[dict, str] = None + self, query: Optional[Union[dict, str]] = None ) -> Generator[dict, None, None]: """ Get data as a dictionary from documents in the database. Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to get data from. Default is `None`. Returns @@ -490,14 +490,14 @@ def get_items( yield {"_id": hit["_id"], **hit["_source"]} def get_atoms( - self, query: Optional[dict, str] = None + self, query: Optional[Union[dict, str]] = None ) -> Generator[Atoms, None, None]: """ Get data as Atoms object from documents in the database. Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to get data from. Default is `None`. Returns @@ -516,13 +516,13 @@ def get_atoms( ): yield AtomsModel(None, None, hit["_source"]).to_ase() - def count(self, query: Optional[dict, str] = None, timeout=30.0) -> int: + def count(self, query: Optional[Union[dict, str]] = None, timeout=30.0) -> int: """ Counts number of documents in the database. Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to be counted. Default is `None`. timeout: float Timeout for request in seconds. @@ -544,7 +544,7 @@ def count(self, query: Optional[dict, str] = None, timeout=30.0) -> int: def _get_props_from_source( self, names: Union[str, list[str]], - query: Optional[dict, str] = None, + query: Optional[Union[dict, str]] = None, ) -> dict: """ Gets all values of specified properties using the original data from _source. @@ -553,7 +553,7 @@ def _get_props_from_source( ---------- names: Union[str, list[str]] Name or list of names of properties to return. - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to get properties from. Default is `None`. Returns @@ -581,7 +581,7 @@ def property( self, names: Union[str, list[str]], allow_flatten: bool = True, - query: Optional[dict, str] = None, + query: Optional[Union[dict, str]] = None, ) -> Union[dict, list]: """ Gets all values of specified properties for matching documents in the database. @@ -593,7 +593,7 @@ def property( allow_flatten: bool = True Whether to allow arrays to be returned flattened. There is no guarantee for the order of returned values. Default is `True`. - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to get properties from. Default is `None`. Returns @@ -645,7 +645,7 @@ def property( return props[names[0]] return props - def count_property(self, name, query: Optional[dict, str] = None) -> dict: + def count_property(self, name, query: Optional[Union[dict, str]] = None) -> dict: """ Counts values of a specified property for matching documents in the database. This method much faster than performing a Count on the list @@ -653,7 +653,7 @@ def count_property(self, name, query: Optional[dict, str] = None) -> dict: Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to count properties from. Default is `None`. Returns @@ -685,14 +685,14 @@ def count_property(self, name, query: Optional[dict, str] = None) -> dict: return prop - def properties(self, query: Optional[dict, str] = None) -> dict: + def properties(self, query: Optional[Union[dict, str]] = None) -> dict: """ Gets lists of all properties from matching documents, separated into info, derived, and array properties. Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to get properties from. Default is `None`. Returns @@ -778,13 +778,13 @@ def get_type_of_property(self, prop: str, category: str) -> str: return "vector({})".format(map_types[type(data[0])]) return "scalar({})".format(map_types[type(data)]) - def count_properties(self, query: Optional[dict, str] = None) -> dict: + def count_properties(self, query: Optional[Union[dict, str]] = None) -> dict: """ Counts all properties from matching documents. Parameters ---------- - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to count properties from. Default is `None`. Returns @@ -837,7 +837,7 @@ def count_properties(self, query: Optional[dict, str] = None) -> dict: return properties - def add_property(self, data: dict, query: Optional[dict, str] = None): + def add_property(self, data: dict, query: Optional[Union[dict, str]] = None): """ Adds properties to matching documents. @@ -845,7 +845,7 @@ def add_property(self, data: dict, query: Optional[dict, str] = None): ---------- data: dict Property key-value pairs to be added to matching documents. - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to add properties to. Default is `None`. """ query = self.parser(query) @@ -870,7 +870,7 @@ def add_property(self, data: dict, query: Optional[dict, str] = None): ) def rename_property( - self, name: str, new_name: str, query: Optional[dict, str] = None + self, name: str, new_name: str, query: Optional[Union[dict, str]] = None ): """ Renames property for all matching documents. @@ -881,7 +881,7 @@ def rename_property( Current name of property to be renamed. new_name: str New name of property to be renamed. - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to rename property. Default is `None`. """ query = self.parser(query) @@ -907,7 +907,7 @@ def rename_property( self.client.update_by_query(index=self.index_name, body=body) - def delete_property(self, name: str, query: Optional[dict, str] = None): + def delete_property(self, name: str, query: Optional[Union[dict, str]] = None): """ Deletes property from all matching documents. @@ -915,7 +915,7 @@ def delete_property(self, name: str, query: Optional[dict, str] = None): ---------- name: str Name of property to be deleted from documents. - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents to have property deleted. Default is `None`. """ query = self.parser(query) @@ -941,7 +941,7 @@ def delete_property(self, name: str, query: Optional[dict, str] = None): self.client.update_by_query(index=self.index_name, body=body) def hist( - self, name: str, query: Optional[dict, str] = None, **kwargs + self, name: str, query: Optional[Union[dict, str]] = None, **kwargs ) -> Union[dict, None]: """ Calculate histogram statistics for a property from all matching documents. @@ -950,7 +950,7 @@ def hist( ---------- name: str Name of property. - query: Optional[dict, str] + query: Optional[Union[dict, str]] Query to filter documents. Default is `None`. Returns From 8befe61f196f86721f1265ad0d18cbe156797257 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:40:45 +0100 Subject: [PATCH 077/112] Fix connection type --- abcd/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/abcd/__init__.py b/abcd/__init__.py index 8fe111f7..da922709 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -24,7 +24,7 @@ def from_url(cls, url, **kwargs): r = parse.urlparse(url) logger.info(r) - if r.scheme == "mongodb": + if ConnectionType[r.scheme] is ConnectionType.mongodb: conn_settings = { "host": r.hostname, "port": r.port, @@ -45,8 +45,8 @@ def from_url(cls, url, **kwargs): from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, host=r.geturl(), uri_mode=True, **kwargs) - r.scheme = ConnectionType[r.scheme] - if r.scheme is ConnectionType.opensearch: + + if ConnectionType[r.scheme] is ConnectionType.opensearch: conn_settings = { "host": r.hostname, "port": r.port, From f96cbb429bd9486788acf52167ec24cf2356f7e9 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:47:35 +0200 Subject: [PATCH 078/112] Apply suggestions from code review Co-authored-by: Jacob Wilkins <46597752+oerc0122@users.noreply.github.com> --- abcd/backends/atoms_opensearch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 6e675078..36fcc996 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -174,13 +174,13 @@ def from_atoms( return obj @property - def _id(self) -> Union[str, None]: + def _id(self) -> Optional[str]: """ Get the OpenSearch document ID stored in data. Returns ------- - Union[str, None] + Optional[str] Current document ID. """ return self.get("_id", None) From 294a2f9577a825c7e545b53c8e87110e188304c6 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:26:18 +0100 Subject: [PATCH 079/112] Fix renaming keys --- abcd/backends/atoms_opensearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 36fcc996..eb51b91e 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -887,7 +887,7 @@ def rename_property( query = self.parser(query) logger.info("rename: query=%s, old=%s, new=%s", query, name, new_name) - script_txt = "if (!ctx._source.containsKey(params.new_name)) {{ " + script_txt = "if (!ctx._source.containsKey(params.new_name)) { " script_txt += ( f"ctx._source.{new_name} = ctx._source.{name};" " ctx._source.remove(params.name);" From d462dd564ccb0b771c1e973334ae190b78750c0c Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:01:06 +0100 Subject: [PATCH 080/112] Tidy setting db --- abcd/__init__.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/abcd/__init__.py b/abcd/__init__.py index da922709..a4f7d456 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -24,6 +24,9 @@ def from_url(cls, url, **kwargs): r = parse.urlparse(url) logger.info(r) + db = r.path.split("/")[1] if r.path else None + db = db if db else "abcd" + if ConnectionType[r.scheme] is ConnectionType.mongodb: conn_settings = { "host": r.hostname, @@ -33,9 +36,6 @@ def from_url(cls, url, **kwargs): "authSource": "admin", } - db = r.path.split("/")[1] if r.path else None - db = db if db else "abcd" - from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, **conn_settings, **kwargs) @@ -54,9 +54,6 @@ def from_url(cls, url, **kwargs): "password": r.password, } - db = r.path.split("/")[1] if r.path else None - db = db if db else "abcd" - from abcd.backends.atoms_opensearch import OpenSearchDatabase return OpenSearchDatabase(db=db, **conn_settings, **kwargs) From 70e024461b24e1ab20a5ba69546db646bc3e1a93 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:05:18 +0100 Subject: [PATCH 081/112] Tidy code --- abcd/frontends/commandline/commands.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index d3c57cb1..540d238f 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -184,12 +184,11 @@ def key_add(*, db, query, keys, **kwargs): for query in query ] else: - test = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) + test = ("AND", query, ("OR", *(("NAME", key) for key in data))) + elif isinstance(db, OpenSearchDatabase): + test = " OR ".join(f"{key}:*" for key in data) else: - if isinstance(db, OpenSearchDatabase): - test = " OR ".join(f"{key}:*" for key in data) - else: - test = ("OR", *(("NAME", key) for key in data.keys())) + test = ("OR", *(("NAME", key) for key in data)) if db.count(query=test): print( @@ -230,7 +229,7 @@ def key_delete(*, db, query, yes, keys, **kwargs): for query in query ] else: - query = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) + query = ("AND", query, ("OR", *(("NAME", key) for key in data))) if not yes: print( @@ -239,7 +238,7 @@ def key_delete(*, db, query, yes, keys, **kwargs): ) exit(1) - for k in data.keys(): + for k in data: db.delete_property(k, query=query) From a67c5a4fd01a65c3d7f0e8cd96aee2545a0c4715 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:10:51 +0100 Subject: [PATCH 082/112] Tidy logs --- abcd/frontends/commandline/commands.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index 540d238f..deebba9b 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -12,7 +12,7 @@ @init_config def login(*, config, name, url, disable_ssl=False, **kwargs): logger.info( - f"login args: \nconfig:{config}, name:{name}, url:{url}, kwargs:{kwargs}" + "login args: \nconfig:%s, name:%s, url:%s, kwargs:%s", config, name, url, kwargs ) from abcd import ABCD @@ -35,7 +35,7 @@ def login(*, config, name, url, disable_ssl=False, **kwargs): @init_config @init_db def download(*, db, query, fileformat, filename, **kwargs): - logger.info(f"download\n kwargs: {kwargs}") + logger.info("download\n kwargs: %s", kwargs) from ase.io import write @@ -50,7 +50,7 @@ def download(*, db, query, fileformat, filename, **kwargs): @init_db @check_remote def delete(*, db, query, yes, **kwargs): - logger.info(f"delete\n kwargs: {kwargs}") + logger.info("delete\n kwargs: %s", kwargs) if not yes: print(f"Please use --yes for deleting {db.count(query=query)} configurations") @@ -74,10 +74,10 @@ def upload(*, db, path, extra_infos, ignore_calc_results, **kwargs): elif path.is_dir(): for file in path.glob(".xyz"): - logger.info(f"Uploaded file: {file}") + logger.info("Uploaded file: %s", file) db.upload(file, extra_infos, store_calc=calculator) else: - logger.info(f"No file found: {path}") + logger.info("No file found: %s", path) raise FileNotFoundError() else: @@ -87,8 +87,8 @@ def upload(*, db, path, extra_infos, ignore_calc_results, **kwargs): @init_config @init_db def summary(*, db, query, print_all, bins, truncate, props, **kwargs): - logger.info(f"summary\n kwargs: {kwargs}") - logger.info(f"query: {query}") + logger.info("summary\n kwargs: %s", kwargs) + logger.info("query: %s", query) if print_all: truncate = None @@ -152,8 +152,8 @@ def summary(*, db, query, print_all, bins, truncate, props, **kwargs): @init_config @init_db def show(*, db, query, print_all, props, **kwargs): - logger.info(f"show\n kwargs: {kwargs}") - logger.info(f"query: {query}") + logger.info("show\n kwargs: %s", kwargs) + logger.info("query: %s", query) if not props: print("Please define at least on property by using the -p option!") @@ -262,7 +262,9 @@ def server(*, abcd_url, url, api_only, **kwargs): from abcd.server.app import create_app - logger.info(f"SERVER - abcd: {abcd_url}, url: {url}, api_only:{api_only}") + logger.info( + "SERVER - abcd: %s, url: %s, api_only: %s", abcd_url, url, api_only + ) if api_only: print("Not implemented yet!") From 08d116a000645284732e5e012522e6ef6f02e9b2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:25:31 +0100 Subject: [PATCH 083/112] Tidy code --- abcd/backends/atoms_opensearch.py | 60 ++++++++++++++------------ abcd/backends/atoms_properties.py | 7 +-- abcd/backends/utils.py | 2 +- abcd/frontends/commandline/commands.py | 4 +- 4 files changed, 36 insertions(+), 37 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index eb51b91e..c535bd37 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections.abc import Generator +from collections.abc import Iterator from datetime import datetime from typing import Iterable, Optional, Union import logging @@ -40,20 +40,20 @@ class OpenSearchQuery(AbstractQuerySet): """Class to parse and build queries for OpenSearch.""" - def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: + def __call__(self, query: Optional[Union[dict, str, list]]) -> Optional[dict]: """ Parses and builds queries for OpenSearch. Parameters ---------- - query: Union[dict, str, list, None] + query: Optional[Union[dict, str, list]] Query to be parsed for OpenSearch. If passed as a dictionary, the query is left unchanged. If passed a string or list, the query is treated as a query string, based on Lucene query syntax. Returns ------- - Union[dict, None] + Optional[dict] The parsed query for OpenSearch. """ if not query: @@ -327,6 +327,7 @@ def delete(self, query: Optional[Union[dict, str]] = None): Query to filter documents to be deleted. Default is `None`. """ query = self.parser(query) + logger.info("parsed query: %s", query) body = {"query": query} self.client.delete_by_query( @@ -354,7 +355,7 @@ def refresh(self): """ self.client.indices.refresh(index=self.index_name) - def save_bulk(self, actions: Iterable, **kwargs): + def save_bulk(self, actions: Iterable[dict], **kwargs): """ Save a collection of documents in bulk. @@ -410,7 +411,7 @@ def push( ) data.save() - elif isinstance(atoms, Generator) or isinstance(atoms, list): + elif isinstance(atoms, Iterator) or isinstance(atoms, list): actions = [] for i, item in enumerate(atoms): if isinstance(extra_info, list): @@ -431,7 +432,7 @@ def push( def upload( self, file: Path, - extra_infos: Optional[Union[Iterable, dict]] = None, + extra_infos: Union[Iterable, dict] = (), store_calc: bool = True, ): """ @@ -441,9 +442,9 @@ def upload( ---------- file: Path Path to file to be uploaded - extra_infos: Optional[Union[Iterable, dict]] + extra_infos: Union[Iterable, dict] Extra information to store in the document with the atoms data. - Default is `None`. + Default is `()`. store_calc: bool, optional Whether to store data from the calculator attached to atoms. Default is `True`. @@ -452,19 +453,14 @@ def upload( if isinstance(file, str): file = Path(file) - extra_info = {} - if extra_infos: - for info in extra_infos: - extra_info.update(extras.parser.parse(info)) # type: ignore + extra_info = dict(map(extras.parser.parse, extra_infos)) extra_info["filename"] = str(file) data = iread(str(file)) self.push(data, extra_info, store_calc=store_calc) - def get_items( - self, query: Optional[Union[dict, str]] = None - ) -> Generator[dict, None, None]: + def get_items(self, query: Optional[Union[dict, str]] = None) -> Iterator[dict]: """ Get data as a dictionary from documents in the database. @@ -475,9 +471,11 @@ def get_items( Returns ------- - Generator for dictionary of data. + Iterator[dict] + Iterator for dictionary of data. """ query = self.parser(query) + logger.info("parsed query: %s", query) query = { "query": query, } @@ -489,9 +487,7 @@ def get_items( ): yield {"_id": hit["_id"], **hit["_source"]} - def get_atoms( - self, query: Optional[Union[dict, str]] = None - ) -> Generator[Atoms, None, None]: + def get_atoms(self, query: Optional[Union[dict, str]] = None) -> Iterator[Atoms]: """ Get data as Atoms object from documents in the database. @@ -502,9 +498,11 @@ def get_atoms( Returns ------- - Generator for AtomsModel object of data. + Iterator[Atoms] + Generator for AtomsModel object of data. """ query = self.parser(query) + logger.info("parsed query: %s", query) query = { "query": query, } @@ -514,7 +512,7 @@ def get_atoms( index=self.index_name, query=query, ): - yield AtomsModel(None, None, hit["_source"]).to_ase() + yield AtomsModel(dict=hit["_source"]).to_ase() def count(self, query: Optional[Union[dict, str]] = None, timeout=30.0) -> int: """ @@ -603,6 +601,7 @@ def property( if only one property is given. """ query = self.parser(query) + logger.info("parsed query: %s", query) query = { "query": query, } @@ -662,6 +661,7 @@ def count_property(self, name, query: Optional[Union[dict, str]] = None) -> dict matching documents. """ query = self.parser(query) + logger.info("parsed query: %s", query) body = { "size": 0, @@ -678,9 +678,9 @@ def count_property(self, name, query: Optional[Union[dict, str]] = None) -> dict prop = {} - for val in self.client.search( - index=self.index_name, body=body - )["aggregations"][format(name)]["buckets"]: + for val in self.client.search(index=self.index_name, body=body)["aggregations"][ + format(name) + ]["buckets"]: prop[val["key"]] = val["doc_count"] return prop @@ -702,6 +702,7 @@ def properties(self, query: Optional[Union[dict, str]] = None) -> dict: the properties of that type. """ query = self.parser(query) + logger.info("parsed query: %s", query) properties = {} @@ -793,6 +794,7 @@ def count_properties(self, query: Optional[Union[dict, str]] = None) -> dict: corresponding to their counts, categories and data types. """ query = self.parser(query) + logger.info("parsed query: %s", query) properties = {} try: @@ -942,7 +944,7 @@ def delete_property(self, name: str, query: Optional[Union[dict, str]] = None): def hist( self, name: str, query: Optional[Union[dict, str]] = None, **kwargs - ) -> Union[dict, None]: + ) -> Optional[dict]: """ Calculate histogram statistics for a property from all matching documents. @@ -955,10 +957,12 @@ def hist( Returns ------- - Dictionary containing histogram statistics, including the number of - bins, edges, counts, min, max, and standard deviation. + Optional[dict] + Dictionary containing histogram statistics, including the number of + bins, edges, counts, min, max, and standard deviation. """ query = self.parser(query) + logger.info("parsed query: %s", query) data = self.property(name, query) return utils.histogram(name, data, **kwargs) diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 5ce1ccb2..7372e797 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -121,7 +121,7 @@ def __init__( "`struct_name_label` must be specified if store_struct_file is" " True." ) - self.struct_name_label = struct_name_label + self.struct_name_label = struct_name_label self.set_struct_files() def _separate_units(self): @@ -175,15 +175,12 @@ def get_struct_file(self, struct_name: str) -> str: ------- Filename for the current structure. """ - if struct_name is None: - raise ValueError("`struct_name` must be specified") if "{struct_name}" not in self.struct_file_template: raise ValueError( "'struct_name' must be a variable in the template file: " f"{self.struct_file_template}" ) - else: - return eval(f"f'{self.struct_file_template}'") + return eval(f"f'{self.struct_file_template}'") def to_list(self) -> list[dict]: """ diff --git a/abcd/backends/utils.py b/abcd/backends/utils.py index dd6f18aa..e55471eb 100644 --- a/abcd/backends/utils.py +++ b/abcd/backends/utils.py @@ -12,7 +12,7 @@ def histogram(name, data, **kwargs): if not data: return None - if data and isinstance(data, list): + if isinstance(data, list): ptype = type(data[0]) if not all(isinstance(x, ptype) for x in data): diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index deebba9b..87c02e9b 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -262,9 +262,7 @@ def server(*, abcd_url, url, api_only, **kwargs): from abcd.server.app import create_app - logger.info( - "SERVER - abcd: %s, url: %s, api_only: %s", abcd_url, url, api_only - ) + logger.info("SERVER - abcd: %s, url: %s, api_only: %s", abcd_url, url, api_only) if api_only: print("Not implemented yet!") From 98c383099c735c03a8b57b52c6964c046952e872 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 08:58:50 +0100 Subject: [PATCH 084/112] Fix extra info --- abcd/backends/atoms_opensearch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index c535bd37..e4ef72bf 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -453,7 +453,9 @@ def upload( if isinstance(file, str): file = Path(file) - extra_info = dict(map(extras.parser.parse, extra_infos)) + extra_info = {} + for info in extra_infos: + extra_info.update(extras.parser.parse(info)) extra_info["filename"] = str(file) From d5c243ec684b3fa68bd4766b1b7c886113a4dc26 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 09:41:45 +0100 Subject: [PATCH 085/112] Tidy code --- tests/cli.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/cli.py b/tests/cli.py index f52a8ea1..42940cb3 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -1,8 +1,11 @@ +import logging import os +from pathlib import Path import subprocess -import unittest -import logging from time import sleep +import unittest + +DATA_PATH = Path(__file__).parent / "data" class CLI(unittest.TestCase): @@ -40,8 +43,7 @@ def test_summary(self): """ Test summary output of uploaded data file. """ - class_path = os.path.normpath(os.path.abspath(__file__)) - data_file = os.path.dirname(class_path) + "/data/example.xyz" + data_file = DATA_PATH / "example.xyz" subprocess.run( f"abcd upload {data_file} -i -e 'test_data'", shell=True, check=True @@ -58,9 +60,8 @@ def test_query(self): """ Test lucene-style query. """ - class_path = os.path.normpath(os.path.abspath(__file__)) - data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" - data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" + data_file_1 = DATA_PATH / "example.xyz" + data_file_2 = DATA_PATH / "example_2.xyz" subprocess.run( f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True @@ -92,9 +93,8 @@ def test_range_query(self): """ Test lucene-style ranged query. """ - class_path = os.path.normpath(os.path.abspath(__file__)) - data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" - data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" + data_file_1 = DATA_PATH / "example.xyz" + data_file_2 = DATA_PATH / "example_2.xyz" subprocess.run( f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True From b20b45488523a1f315f5f7b00da5c01f13803171 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:19:06 +0100 Subject: [PATCH 086/112] Update mongomock tests for pytest --- tests/{mongo_mock.py => test_mongomock.py} | 36 +++++++++------------- 1 file changed, 14 insertions(+), 22 deletions(-) rename tests/{mongo_mock.py => test_mongomock.py} (58%) diff --git a/tests/mongo_mock.py b/tests/test_mongomock.py similarity index 58% rename from tests/mongo_mock.py rename to tests/test_mongomock.py index 5d583fdc..5444fe59 100644 --- a/tests/mongo_mock.py +++ b/tests/test_mongomock.py @@ -5,29 +5,25 @@ from ase.io import read from ase.atoms import Atoms import mongomock +import pytest from abcd import ABCD -class MongoMock(unittest.TestCase): - @classmethod +class TestMongoMock: + @pytest.fixture(autouse=True) @mongomock.patch(servers=(("localhost", 27017),)) - def setUpClass(cls): + def abcd(self): logging.basicConfig(level=logging.INFO) url = "mongodb://localhost" - abcd = ABCD.from_url(url) - abcd.print_info() + mongo_abcd = ABCD.from_url(url) + mongo_abcd.print_info() + return mongo_abcd - cls.abcd = abcd + def test_info(self, abcd): + print(abcd.info()) - @classmethod - def tearDownClass(cls): - cls.abcd.destroy() - - def test_thing(self): - print(self.abcd.info()) - - def test_push(self): + def test_push(self, abcd): xyz = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -40,13 +36,9 @@ def test_push(self): assert isinstance(atoms, Atoms) atoms.set_cell([1, 1, 1]) - self.abcd.destroy() - self.abcd.push(atoms) - new = list(self.abcd.get_atoms())[0] + abcd.destroy() + abcd.push(atoms) + new = list(abcd.get_atoms())[0] assert atoms == new - self.abcd.destroy() - - -if __name__ == "__main__": - unittest.main(verbosity=1, exit=False) + abcd.destroy() From 718a3bf0ec036bfda4e7040fee5f45a87cb3f49c Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:24:37 +0100 Subject: [PATCH 087/112] Update mock opensearch tests for pytest --- ...search_mock.py => test_opensearch_mock.py} | 109 ++++++++---------- 1 file changed, 45 insertions(+), 64 deletions(-) rename tests/{opensearch_mock.py => test_opensearch_mock.py} (56%) diff --git a/tests/opensearch_mock.py b/tests/test_opensearch_mock.py similarity index 56% rename from tests/opensearch_mock.py rename to tests/test_opensearch_mock.py index 6f0959c6..539ce5e2 100644 --- a/tests/opensearch_mock.py +++ b/tests/test_opensearch_mock.py @@ -1,75 +1,60 @@ -from importlib import reload from io import StringIO import logging import os -import unittest from ase.atoms import Atoms from ase.io import read from openmock import openmock +import pytest from abcd import ABCD -from abcd.backends import atoms_opensearch -from abcd.backends.atoms_opensearch import AtomsModel +from abcd.backends.atoms_opensearch import AtomsModel, OpenSearchDatabase -class OpenSearchMock(unittest.TestCase): - """ - Testing mock OpenSearch database functions. - """ +class TestOpenSearchMock: + """Testing mock OpenSearch database functions.""" - @classmethod + @pytest.fixture(autouse=True) @openmock - def setUpClass(cls): - """ - Set up database connection. - """ - reload(atoms_opensearch) - from abcd.backends.atoms_opensearch import OpenSearchDatabase + def abcd(self): + """Set up database connection.""" if "port" in os.environ: - cls.port = int(os.environ["port"]) + port = int(os.environ["port"]) else: - cls.port = 9200 - cls.host = "localhost" + port = 9200 + host = "localhost" logging.basicConfig(level=logging.INFO) - url = f"opensearch://admin:admin@{cls.host}:{cls.port}" - abcd = ABCD.from_url(url, index_name="test_index", analyse_schema=False) - assert isinstance(abcd, OpenSearchDatabase) - cls.abcd = abcd + url = f"opensearch://admin:admin@{host}:{port}" + opensearch_abcd = ABCD.from_url(url, index_name="test_index", analyse_schema=False) + assert isinstance(opensearch_abcd, OpenSearchDatabase) + return opensearch_abcd - @classmethod - def tearDownClass(cls): - """ - Delete index from database. - """ - cls.abcd.destroy() - - def test_destroy(self): + def test_destroy(self, abcd): """ Test destroying database index. """ - self.assertTrue(self.abcd.client.indices.exists("test_index")) - self.abcd.destroy() - self.assertFalse(self.abcd.client.indices.exists("test_index")) + assert abcd.client.indices.exists("test_index") is True + abcd.destroy() + assert abcd.client.indices.exists("test_index") is False - def test_create(self): + def test_create(self, abcd): """ Test creating database index. """ - self.abcd.destroy() - self.abcd.create() - self.assertTrue(self.abcd.client.indices.exists("test_index")) - self.assertFalse(self.abcd.client.indices.exists("fake_index")) + abcd.destroy() + abcd.create() + assert abcd.client.indices.exists("test_index") is True + abcd.client.indices.exists("fake_index") is False - def test_push(self): + def test_push(self, abcd): """ Test pushing atoms objects to database individually. """ - self.abcd.destroy() - self.abcd.create() + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -80,7 +65,7 @@ def test_push(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1) + abcd.push(atoms_1) xyz_2 = StringIO( """2 @@ -96,17 +81,17 @@ def test_push(self): result = AtomsModel( None, None, - self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() - self.assertEqual(atoms_1, result) - self.assertNotEqual(atoms_2, result) + assert atoms_1 == result + assert atoms_2 != result - def test_bulk(self): + def test_bulk(self, abcd): """ Test pushing atoms object to database together. """ - self.abcd.destroy() - self.abcd.create() + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -131,28 +116,28 @@ def test_bulk(self): atoms_list = [] atoms_list.append(atoms_1) atoms_list.append(atoms_2) - self.abcd.push(atoms_list) - self.assertEqual(self.abcd.count(), 2) + abcd.push(atoms_list) + assert abcd.count() == 2 result_1 = AtomsModel( None, None, - self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() result_2 = AtomsModel( None, None, - self.abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], ).to_ase() - self.assertEqual(atoms_1, result_1) - self.assertEqual(atoms_2, result_2) + assert atoms_1 == result_1 + assert atoms_2 == result_2 - def test_count(self): + def test_count(self, abcd): """ Test counting the number of documents in the database. """ - self.abcd.destroy() - self.abcd.create() + abcd.destroy() + abcd.create() xyz = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -164,10 +149,6 @@ def test_count(self): atoms = read(xyz, format="extxyz") assert isinstance(atoms, Atoms) atoms.set_cell([1, 1, 1]) - self.abcd.push(atoms) - self.abcd.push(atoms) - self.assertEqual(self.abcd.count(), 2) - - -if __name__ == "__main__": - unittest.main(verbosity=1, exit=False) + abcd.push(atoms) + abcd.push(atoms) + assert abcd.count() == 2 From de9593c66affcc3de48b22ad6e44ea079d9bc8b2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:33:03 +0100 Subject: [PATCH 088/112] Update CLI tests for pytest --- tests/{cli.py => test_cli.py} | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) rename tests/{cli.py => test_cli.py} (79%) diff --git a/tests/cli.py b/tests/test_cli.py similarity index 79% rename from tests/cli.py rename to tests/test_cli.py index 42940cb3..0cfa8e70 100644 --- a/tests/cli.py +++ b/tests/test_cli.py @@ -3,43 +3,43 @@ from pathlib import Path import subprocess from time import sleep -import unittest + +import pytest + DATA_PATH = Path(__file__).parent / "data" +NOT_GTHUB_ACTIONS = True +if os.getenv("GITHUB_ACTIONS") == "true": + NOT_GTHUB_ACTIONS = False -class CLI(unittest.TestCase): - """ - Testing OpenSearch database CLI integration. - """ +@pytest.mark.skipif(NOT_GTHUB_ACTIONS, reason="Not running via GitHub Actions") +class TestCli: + """Testing OpenSearch database CLI integration.""" - @classmethod - def setUpClass(cls): - """ - Set up OpenSearch database connection and login with CLI. - """ - if os.getenv("GITHUB_ACTIONS") != "true": - raise unittest.SkipTest("Only runs via GitHub Actions") - cls.security_enabled = os.getenv("security_enabled") == "true" - cls.port = int(os.environ["port"]) - cls.host = "localhost" + @pytest.fixture(autouse=True) + def abcd(self): + """Set up OpenSearch database connection and login with CLI.""" + security_enabled = os.getenv("security_enabled") == "true" + port = int(os.environ["port"]) + host = "localhost" if os.environ["opensearch-version"] == "latest": - cls.credential = "admin:myStrongPassword123!" + credential = "admin:myStrongPassword123!" else: - cls.credential = "admin:admin" + credential = "admin:admin" logging.basicConfig(level=logging.INFO) - url = f"opensearch://{cls.credential}@{cls.host}:{cls.port}" - if not cls.security_enabled: + url = f"opensearch://{credential}@{host}:{port}" + if not security_enabled: url += " --disable_ssl" try: subprocess.run(f"abcd login {url}", shell=True, check=True) except subprocess.CalledProcessError: sleep(10) subprocess.run(f"abcd login {url}", shell=True, check=True) - - def test_summary(self): + + def test_summary(self, abcd): """ Test summary output of uploaded data file. """ @@ -56,7 +56,7 @@ def test_summary(self): assert "Total number of configurations" in summary.stdout subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) - def test_query(self): + def test_query(self, abcd): """ Test lucene-style query. """ @@ -89,7 +89,7 @@ def test_query(self): assert "3" in summary.stdout and "2" not in summary.stdout subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) - def test_range_query(self): + def test_range_query(self, abcd): """ Test lucene-style ranged query. """ From eaf290e6c6adfcaeb99ca70eb4056a84c1204c9b Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:49:08 +0100 Subject: [PATCH 089/112] Update property tests for pytest --- tests/{properties.py => test_properties.py} | 78 ++++++++++----------- 1 file changed, 36 insertions(+), 42 deletions(-) rename tests/{properties.py => test_properties.py} (65%) diff --git a/tests/properties.py b/tests/test_properties.py similarity index 65% rename from tests/properties.py rename to tests/test_properties.py index 76177cf9..1578fc64 100644 --- a/tests/properties.py +++ b/tests/test_properties.py @@ -1,54 +1,52 @@ import os -import unittest from pandas import DataFrame +import pytest from abcd.backends.atoms_properties import Properties -class PropertiesTests(unittest.TestCase): +class TestProperties: """Testing properties data reader""" - @classmethod - def setUpClass(cls): - """ - Load example data file. - """ + @pytest.fixture(autouse=True) + def property(self): + """Load example data file.""" class_path = os.path.normpath(os.path.abspath(__file__)) data_file = os.path.dirname(class_path) + "/data/examples.csv" - cls.property = Properties(data_file) + return Properties(data_file) - def test_dataframe(self): + def test_dataframe(self, property): """ Test data correctly stored in pandas DataFrame. """ - assert isinstance(self.property.df, DataFrame) - assert len(self.property.df) == 3 + assert isinstance(property.df, DataFrame) + assert len(property.df) == 3 - def test_specify_units(self): + def test_specify_units(self, property): """ Test units can be specified manually, if they match existing fields. """ input_units_1 = {"Integers": "items", "Floating": "seconds"} properties_1 = Properties( - data_file=self.property.data_file, + data_file=property.data_file, units=input_units_1, ) - self.assertEqual(properties_1.units, input_units_1) + assert properties_1.units == input_units_1 input_units_2 = {"Fake": "m"} - with self.assertRaises(ValueError): + with pytest.raises(ValueError): properties_1 = Properties( - data_file=self.property.data_file, + data_file=property.data_file, units=input_units_2, ) - def test_infer_units(self): + def test_infer_units(self, property): """ Test units can be inferred from field names. """ properties = Properties( - data_file=self.property.data_file, + data_file=property.data_file, infer_units=True, ) expected_units = {"Comma units": "m", "Bracket units": "s"} @@ -61,17 +59,17 @@ def test_infer_units(self): "Comma units", "Bracket units", ] - self.assertEqual(properties.units, expected_units) - self.assertEqual(list(properties.df.columns.values), expected_fields) + assert properties.units == expected_units + assert list(properties.df.columns.values) == expected_fields - def test_struct_file(self): + def test_struct_file(self, property): """ Test structure file names can be inferred from a field. """ struct_file_template = "test_{struct_name}_file.txt" struct_name_label = "Text" properties_1 = Properties( - data_file=self.property.data_file, + data_file=property.data_file, store_struct_file=True, struct_file_template=struct_file_template, struct_name_label=struct_name_label, @@ -81,35 +79,35 @@ def test_struct_file(self): "test_test_file.txt", "test_data_file.txt", ] - self.assertIsInstance(properties_1.struct_files, list) + assert isinstance(properties_1.struct_files, list) for i, file in enumerate(expected_struct_files): - self.assertEqual(properties_1.struct_files[i], file) + assert properties_1.struct_files[i] == file invalid_template = "invalid_template" - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Properties( - data_file=self.property.data_file, + data_file=property.data_file, store_struct_file=True, struct_file_template=invalid_template, struct_name_label=struct_name_label, ) invalid_label = "label" - with self.assertRaises(ValueError): + with pytest.raises(ValueError): Properties( - data_file=self.property.data_file, + data_file=property.data_file, store_struct_file=True, struct_file_template=struct_file_template, struct_name_label=invalid_label, ) - def test_to_list(self): + def test_to_list(self, property): """ Test dataframe can be converted into a list of properties. """ - self.assertEqual(len(self.property.to_list()), 3) - self.assertIsInstance(self.property.to_list(), list) - self.assertIsInstance(self.property.to_list()[0], dict) + assert len(property.to_list()) == 3 + assert isinstance(property.to_list(), list) + assert isinstance(property.to_list()[0], dict) expected_property = { "Text": "Some", "Integers": 1, @@ -119,9 +117,9 @@ def test_to_list(self): "Comma units, m": 0, "Bracket units (s)": 0, } - self.assertEqual(self.property.to_list()[0], expected_property) + assert property.to_list()[0] == expected_property - def test_missing_data(self): + def test_missing_data(self, property): """ Test missing data is not included in properties. """ @@ -133,14 +131,14 @@ def test_missing_data(self): "Comma units, m": 1, "Bracket units (s)": 1, } - self.assertEqual(self.property.to_list()[1], expected_property) + assert property.to_list()[1] == expected_property - def test_to_list_units(self): + def test_to_list_units(self, property): """ Test units are included in properties when converting to a list. """ properties_1 = Properties( - data_file=self.property.data_file, + data_file=property.data_file, infer_units=True, ) expected_units = {"Comma units": "m", "Bracket units": "s"} @@ -154,8 +152,4 @@ def test_to_list_units(self): "Bracket units": 0, "units": expected_units, } - self.assertEqual(properties_1.to_list()[0], expected_property) - - -if __name__ == "__main__": - unittest.main() + assert properties_1.to_list()[0] == expected_property From ad06e2e66b7cc2d1699a54d3dfee2e07b455efc3 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:01:15 +0100 Subject: [PATCH 090/112] Update opensearch tests for pytest --- tests/{opensearch.py => test_opensearch.py} | 466 +++++++++----------- 1 file changed, 202 insertions(+), 264 deletions(-) rename tests/{opensearch.py => test_opensearch.py} (54%) diff --git a/tests/opensearch.py b/tests/test_opensearch.py similarity index 54% rename from tests/opensearch.py rename to tests/test_opensearch.py index 23a56dd4..c858b078 100644 --- a/tests/opensearch.py +++ b/tests/test_opensearch.py @@ -2,67 +2,56 @@ import logging import os from time import sleep -import unittest from ase.atoms import Atoms from ase.io import read from opensearchpy.exceptions import ConnectionError +import pytest from abcd import ABCD from abcd.backends.atoms_opensearch import AtomsModel, OpenSearchDatabase +NOT_GTHUB_ACTIONS = True +if os.getenv("GITHUB_ACTIONS") == "true": + NOT_GTHUB_ACTIONS = False -class OpenSearch(unittest.TestCase): - """ - Testing live OpenSearch database functions. - """ - - @classmethod - def setUpClass(cls): - """ - Set up OpenSearch database connection. - """ - if os.getenv("GITHUB_ACTIONS") != "true": - raise unittest.SkipTest("Only runs via GitHub Actions") - cls.security_enabled = os.getenv("security_enabled") == "true" - cls.port = int(os.environ["port"]) - cls.host = "localhost" +@pytest.mark.skipif(NOT_GTHUB_ACTIONS, reason="Not running via GitHub Actions") +class TestOpenSearch: + """Testing live OpenSearch database functions.""" + + @pytest.fixture(autouse=True) + def abcd(self): + """Set up OpenSearch database connection.""" + security_enabled = os.getenv("security_enabled") == "true" + self.port = int(os.environ["port"]) + self.host = "localhost" if os.environ["opensearch-version"] == "latest": - cls.credential = "admin:myStrongPassword123!" + credential = "admin:myStrongPassword123!" else: - cls.credential = "admin:admin" + credential = "admin:admin" logging.basicConfig(level=logging.INFO) - url = f"opensearch://{cls.credential}@{cls.host}:{cls.port}" + url = f"opensearch://{credential}@{self.host}:{self.port}" try: - abcd = ABCD.from_url( + abcd_opensearch = ABCD.from_url( url, index_name="test_index", - use_ssl=cls.security_enabled, + use_ssl=security_enabled, ) except (ConnectionError, ConnectionResetError): sleep(10) - abcd = ABCD.from_url( + abcd_opensearch = ABCD.from_url( url, index_name="test_index", - use_ssl=cls.security_enabled, + use_ssl=security_enabled, ) - assert isinstance(abcd, OpenSearchDatabase) - cls.abcd = abcd - - @classmethod - def tearDownClass(cls): - """ - Delete index from OpenSearch database. - """ - cls.abcd.destroy() + assert isinstance(abcd_opensearch, OpenSearchDatabase) + return abcd_opensearch - def push_data(self): - """ - Helper function to upload an example xyz file to the database. - """ + def push_data(self, abcd): + """Helper function to upload an example xyz file to the database.""" xyz = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -74,17 +63,15 @@ def push_data(self): atoms = read(xyz, format="extxyz") assert isinstance(atoms, Atoms) atoms.set_cell([1, 1, 1]) - self.abcd.push(atoms) - self.abcd.refresh() - - def test_info(self): - """ - Test printing database info. - """ - self.abcd.destroy() - self.abcd.create() - self.abcd.refresh() - self.abcd.print_info() + abcd.push(atoms) + abcd.refresh() + + def test_info(self, abcd): + """Test printing database info.""" + abcd.destroy() + abcd.create() + abcd.refresh() + abcd.print_info() info = { "host": self.host, @@ -94,36 +81,30 @@ def test_info(self): "number of confs": 0, "type": "opensearch", } - self.assertEqual(self.abcd.info(), info) - - def test_destroy(self): - """ - Test destroying database index. - """ - self.abcd.destroy() - self.abcd.create() - self.abcd.refresh() - self.assertTrue(self.abcd.client.indices.exists("test_index")) - - self.abcd.destroy() - self.assertFalse(self.abcd.client.indices.exists("test_index")) - - def test_create(self): - """ - Test creating database index. - """ - self.abcd.destroy() - self.abcd.create() - self.abcd.refresh() - self.assertTrue(self.abcd.client.indices.exists("test_index")) - self.assertFalse(self.abcd.client.indices.exists("fake_index")) - - def test_push(self): - """ - Test pushing atoms objects to database individually. - """ - self.abcd.destroy() - self.abcd.create() + assert abcd.info() == info + + def test_destroy(self, abcd): + """Test destroying database index.""" + abcd.destroy() + abcd.create() + abcd.refresh() + assert abcd.client.indices.exists("test_index") is True + + abcd.destroy() + assert abcd.client.indices.exists("test_index") is False + + def test_create(self, abcd): + """Test creating database index.""" + abcd.destroy() + abcd.create() + abcd.refresh() + assert abcd.client.indices.exists("test_index") is True + assert abcd.client.indices.exists("fake_index") is False + + def test_push(self, abcd): + """Test pushing atoms objects to database individually.""" + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -134,7 +115,7 @@ def test_push(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1) + abcd.push(atoms_1) xyz_2 = StringIO( """2 @@ -147,34 +128,30 @@ def test_push(self): assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.refresh() + abcd.refresh() result = AtomsModel( None, None, - self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() - self.assertEqual(atoms_1, result) - self.assertNotEqual(atoms_2, result) - - def test_delete(self): - """ - Test deleting all documents from database. - """ - self.push_data() - self.push_data() - - self.assertEqual(self.abcd.count(), 2) - self.abcd.delete() - self.assertTrue(self.abcd.client.indices.exists("test_index")) - self.abcd.refresh() - self.assertEqual(self.abcd.count(), 0) - - def test_bulk(self): - """ - Test pushing atoms object to database together. - """ - self.abcd.destroy() - self.abcd.create() + assert atoms_1 == result + assert atoms_2 != result + + def test_delete(self, abcd): + """Test deleting all documents from database.""" + self.push_data(abcd) + self.push_data(abcd) + + assert abcd.count() == 2 + abcd.delete() + assert abcd.client.indices.exists("test_index") is True + abcd.refresh() + assert abcd.count() == 0 + + def test_bulk(self, abcd): + """Test pushing atoms object to database together.""" + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" @@ -199,39 +176,35 @@ def test_bulk(self): atoms_list = [] atoms_list.append(atoms_1) atoms_list.append(atoms_2) - self.abcd.push(atoms_list) + abcd.push(atoms_list) - self.abcd.refresh() - self.assertEqual(self.abcd.count(), 2) + abcd.refresh() + assert abcd.count() == 2 result_1 = AtomsModel( None, None, - self.abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() result_2 = AtomsModel( None, None, - self.abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], ).to_ase() - self.assertEqual(atoms_1, result_1) - self.assertEqual(atoms_2, result_2) - - def test_count(self): - """ - Test counting the number of documents in the database. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() - self.push_data() - self.assertEqual(self.abcd.count(), 2) - - def test_property(self): - """ - Test getting values of a property from the database. - """ - self.abcd.destroy() - self.abcd.create() + assert atoms_1 == result_1 + assert atoms_2 == result_2 + + def test_count(self, abcd): + """Test counting the number of documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + self.push_data(abcd) + assert abcd.count() == 2 + + def test_property(self, abcd): + """Test getting values of a property from the database.""" + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 @@ -244,7 +217,7 @@ def test_property(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1, store_calc=False) + abcd.push(atoms_1, store_calc=False) xyz_2 = StringIO( """2 @@ -257,26 +230,24 @@ def test_property(self): atoms_2 = read(xyz_2, format="extxyz") assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.push(atoms_2, store_calc=False) + abcd.push(atoms_2, store_calc=False) - self.abcd.refresh() - prop = self.abcd.property("prop_1") + abcd.refresh() + prop = abcd.property("prop_1") expected_prop = ["test_1"] - self.assertEqual(prop, expected_prop) + assert prop == expected_prop - prop = self.abcd.property("energy") + prop = abcd.property("energy") expected_prop = [-5.0, -10.0] - self.assertEqual(prop[0], expected_prop[0]) - self.assertEqual(prop[1], expected_prop[1]) - - def test_properties(self): - """ - Test getting all properties from the database. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() - props = self.abcd.properties() + assert prop[0] == expected_prop[0] + assert prop[1] == expected_prop[1] + + def test_properties(self, abcd): + """Test getting all properties from the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + props = abcd.properties() expected_props = { "info": ["_vtk_test", "cell", "formula", "n_atoms", "pbc", "s", "volume"], "derived": [ @@ -290,14 +261,12 @@ def test_properties(self): ], "arrays": ["numbers", "positions"], } - self.assertEqual(props, expected_props) + assert props == expected_props - def test_count_property(self): - """ - Test counting values of specified properties from the database. - """ - self.abcd.destroy() - self.abcd.create() + def test_count_property(self, abcd): + """Test counting values of specified properties from the database.""" + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 @@ -310,7 +279,7 @@ def test_count_property(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1) + abcd.push(atoms_1) xyz_2 = StringIO( """1 @@ -322,19 +291,17 @@ def test_count_property(self): atoms_2 = read(xyz_2, format="extxyz") assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.push(atoms_2) + abcd.push(atoms_2) - self.abcd.refresh() - self.assertEqual(self.abcd.count_property("prop_1"), {1: 1}) - self.assertEqual(self.abcd.count_property("n_atoms"), {1: 1, 2: 1}) - self.assertEqual(self.abcd.count_property("volume"), {1.0: 2}) + abcd.refresh() + assert abcd.count_property("prop_1") == {1: 1} + assert abcd.count_property("n_atoms") == {1: 1, 2: 1} + assert abcd.count_property("volume") == {1.0: 2} - def test_count_properties(self): - """ - Test counting appearences of each property in documents in the database. - """ - self.abcd.destroy() - self.abcd.create() + def test_count_properties(self, abcd): + """Test counting appearences of each property in documents in the database.""" + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 @@ -347,7 +314,7 @@ def test_count_properties(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1) + abcd.push(atoms_1) xyz_2 = StringIO( """2 @@ -360,10 +327,10 @@ def test_count_properties(self): atoms_2 = read(xyz_2, format="extxyz") assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.push(atoms_2) + abcd.push(atoms_2) - self.abcd.refresh() - props = self.abcd.count_properties() + abcd.refresh() + props = abcd.count_properties() expected_counts = { "prop_1": {"count": 1, "category": "info", "dtype": "scalar(str)"}, "prop_2": {"count": 1, "category": "info", "dtype": "scalar(str)"}, @@ -391,74 +358,58 @@ def test_count_properties(self): "volume": {"count": 2, "category": "derived", "dtype": "scalar(float)"}, } - self.assertEqual(props, expected_counts) - - def test_add_property(self): - """ - Test adding a property to documents in the database. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() - self.abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) - - self.abcd.refresh() - data = self.abcd.client.search(index="test_index") - self.assertEqual( - data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"], "TEST_VALUE" - ) - self.assertIn( - "TEST_PROPERTY", data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] - ) - - def test_rename_property(self): - """ - Test renaming a property for documents in the database. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() - self.abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) - self.abcd.refresh() - self.abcd.rename_property("TEST_PROPERTY", "NEW_PROPERTY") - self.abcd.refresh() - - data = self.abcd.client.search(index="test_index") - self.assertEqual( - data["hits"]["hits"][0]["_source"]["NEW_PROPERTY"], "TEST_VALUE" - ) - - def test_delete_property(self): - """ - Test deleting a property from documents in the database. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() - - self.abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) - self.abcd.refresh() - data = self.abcd.client.search(index="test_index") - self.assertEqual( - data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"], "TEST_VALUE" - ) - - self.abcd.delete_property("TEST_PROPERTY") - self.abcd.refresh() - data = self.abcd.client.search(index="test_index") + assert props == expected_counts + + def test_add_property(self, abcd): + """Test adding a property to documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + + abcd.refresh() + data = abcd.client.search(index="test_index") + assert data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] == "TEST_VALUE" + assert "TEST_PROPERTY" in data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + + def test_rename_property(self, abcd): + """Test renaming a property for documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + abcd.refresh() + abcd.rename_property("TEST_PROPERTY", "NEW_PROPERTY") + abcd.refresh() + + data = abcd.client.search(index="test_index") + assert data["hits"]["hits"][0]["_source"]["NEW_PROPERTY"] == "TEST_VALUE" + + def test_delete_property(self, abcd): + """Test deleting a property from documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + + abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + abcd.refresh() + data = abcd.client.search(index="test_index") + assert data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] == "TEST_VALUE" + + abcd.delete_property("TEST_PROPERTY") + abcd.refresh() + data = abcd.client.search(index="test_index") with self.assertRaises(KeyError): data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] self.assertNotIn( "TEST_PROPERTY", data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] ) - def test_get_items(self): - """ - Test getting a dictionary of values from documents in the database. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() + def test_get_items(self, abcd): + """Test getting a dictionary of values from documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) expected_items = { "_id": None, @@ -501,8 +452,8 @@ def test_get_items(self): "username": None, } - self.abcd.refresh() - items = list(self.abcd.get_items())[0] + abcd.refresh() + items = list(abcd.get_items())[0] for key in expected_items: if key not in [ @@ -516,33 +467,24 @@ def test_get_items(self): if isinstance(expected_items[key], dict): for dict_key in expected_items[key]: if isinstance(expected_items[key][dict_key], list): - self.assertEqual( - set(expected_items[key][dict_key]), - set(items[key][dict_key]), - ) + assert set(expected_items[key][dict_key]) == set(items[key][dict_key]) else: - self.assertEqual( - expected_items[key][dict_key], items[key][dict_key] - ) + assert expected_items[key][dict_key] == items[key][dict_key] else: - self.assertEqual(expected_items[key], items[key]) - - def test_get_atoms(self): - """ - Test getting values from documents in the database as Atoms objects. - """ - self.abcd.destroy() - self.abcd.create() - self.push_data() + assert expected_items[key] == items[key] + + def test_get_atoms(self, abcd): + """Test getting values from documents in the database as Atoms objects.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) expected_atoms = Atoms(symbols="Si2", pbc=False, cell=[1.0, 1.0, 1.0]) - self.assertEqual(expected_atoms, list(self.abcd.get_atoms())[0]) + assert expected_atoms == list(abcd.get_atoms())[0] - def test_query(self): - """ - Test querying documents in the database. - """ - self.abcd.destroy() - self.abcd.create() + def test_query(self, abcd): + """Test querying documents in the database.""" + abcd.destroy() + abcd.create() xyz_1 = StringIO( """2 @@ -555,7 +497,7 @@ def test_query(self): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - self.abcd.push(atoms_1) + abcd.push(atoms_1) xyz_2 = StringIO( """2 @@ -568,18 +510,14 @@ def test_query(self): atoms_2 = read(xyz_2, format="extxyz") assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - self.abcd.push(atoms_2) - self.abcd.refresh() + abcd.push(atoms_2) + abcd.refresh() query_dict = {"match": {"n_atoms": 2}} query_all = "volume: [0 TO 10]" query_1 = "prop_1: *" query_2 = "prop_2: *" - self.assertEqual(self.abcd.count(query_dict), 2) - self.assertEqual(self.abcd.count(query_all), 2) - self.assertEqual(self.abcd.count(query_1), 1) - self.assertEqual(self.abcd.count(query_2), 1) - - -if __name__ == "__main__": - unittest.main(verbosity=1, exit=False) + assert abcd.count(query_dict) == 2 + assert abcd.count(query_all) == 2 + assert abcd.count(query_1) == 1 + assert abcd.count(query_2) == 1 From 45ff643b6e68c5641040619962af1b11d2730338 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:26:56 +0100 Subject: [PATCH 091/112] Fix opensearch mock tests --- tests/test_opensearch_mock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_opensearch_mock.py b/tests/test_opensearch_mock.py index 539ce5e2..207ad5ad 100644 --- a/tests/test_opensearch_mock.py +++ b/tests/test_opensearch_mock.py @@ -28,7 +28,7 @@ def abcd(self): logging.basicConfig(level=logging.INFO) url = f"opensearch://admin:admin@{host}:{port}" - opensearch_abcd = ABCD.from_url(url, index_name="test_index", analyse_schema=False) + opensearch_abcd = ABCD.from_url(url, index_name="test_index", use_ssl=False) assert isinstance(opensearch_abcd, OpenSearchDatabase) return opensearch_abcd From 375daa16fd8c075e267f7c80f3ecf5e916a79f15 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:52:14 +0100 Subject: [PATCH 092/112] Fix opensearch test --- tests/test_opensearch.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py index c858b078..52ff57fa 100644 --- a/tests/test_opensearch.py +++ b/tests/test_opensearch.py @@ -15,6 +15,7 @@ if os.getenv("GITHUB_ACTIONS") == "true": NOT_GTHUB_ACTIONS = False + @pytest.mark.skipif(NOT_GTHUB_ACTIONS, reason="Not running via GitHub Actions") class TestOpenSearch: """Testing live OpenSearch database functions.""" @@ -370,7 +371,10 @@ def test_add_property(self, abcd): abcd.refresh() data = abcd.client.search(index="test_index") assert data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] == "TEST_VALUE" - assert "TEST_PROPERTY" in data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + assert ( + "TEST_PROPERTY" + in data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + ) def test_rename_property(self, abcd): """Test renaming a property for documents in the database.""" @@ -399,10 +403,11 @@ def test_delete_property(self, abcd): abcd.delete_property("TEST_PROPERTY") abcd.refresh() data = abcd.client.search(index="test_index") - with self.assertRaises(KeyError): + with pytest.raises(KeyError): data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] - self.assertNotIn( - "TEST_PROPERTY", data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + assert ( + "TEST_PROPERTY" + not in data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] ) def test_get_items(self, abcd): @@ -467,7 +472,9 @@ def test_get_items(self, abcd): if isinstance(expected_items[key], dict): for dict_key in expected_items[key]: if isinstance(expected_items[key][dict_key], list): - assert set(expected_items[key][dict_key]) == set(items[key][dict_key]) + assert set(expected_items[key][dict_key]) == set( + items[key][dict_key] + ) else: assert expected_items[key][dict_key] == items[key][dict_key] else: From efe16250a2f3b50fa1a04fa8ddb07d1a79f20863 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:52:30 +0100 Subject: [PATCH 093/112] Tidy code --- tests/test_cli.py | 3 ++- tests/test_parsers.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0cfa8e70..3089affc 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -13,6 +13,7 @@ if os.getenv("GITHUB_ACTIONS") == "true": NOT_GTHUB_ACTIONS = False + @pytest.mark.skipif(NOT_GTHUB_ACTIONS, reason="Not running via GitHub Actions") class TestCli: """Testing OpenSearch database CLI integration.""" @@ -38,7 +39,7 @@ def abcd(self): except subprocess.CalledProcessError: sleep(10) subprocess.run(f"abcd login {url}", shell=True, check=True) - + def test_summary(self, abcd): """ Test summary output of uploaded data file. diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 07cab3f0..49e5e4a7 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -131,8 +131,8 @@ def test_composite(self, parser): @pytest.mark.parametrize( "string, expected", [ - ('colon_string:"astring"', {'colon_string': 'astring'}), - ('colon_string_spaces : "astring"', {'colon_string_spaces': 'astring'}), + ('colon_string:"astring"', {"colon_string": "astring"}), + ('colon_string_spaces : "astring"', {"colon_string_spaces": "astring"}), ], ) def test_colon_key_value_pairs(self, parser, string, expected): From 78270ecdf77dd3e749849b02b7db87a94f47ca28 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 17:53:29 +0100 Subject: [PATCH 094/112] Fix opensearch test --- tests/test_opensearch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py index 52ff57fa..1735bca9 100644 --- a/tests/test_opensearch.py +++ b/tests/test_opensearch.py @@ -140,8 +140,11 @@ def test_push(self, abcd): def test_delete(self, abcd): """Test deleting all documents from database.""" + abcd.destroy() + abcd.create() self.push_data(abcd) self.push_data(abcd) + abcd.refresh() assert abcd.count() == 2 abcd.delete() From 532ebffc1a9496edccc4d21264855cc9eb31256c Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Wed, 12 Jun 2024 18:02:10 +0100 Subject: [PATCH 095/112] Fix mock opensearch tests --- tests/test_opensearch_mock.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/test_opensearch_mock.py b/tests/test_opensearch_mock.py index 207ad5ad..94dd6711 100644 --- a/tests/test_opensearch_mock.py +++ b/tests/test_opensearch_mock.py @@ -24,11 +24,20 @@ def abcd(self): else: port = 9200 host = "localhost" + security_enabled = os.getenv("security_enabled") == "true" + if os.environ["opensearch-version"] == "latest": + credential = "admin:myStrongPassword123!" + else: + credential = "admin:admin" logging.basicConfig(level=logging.INFO) - url = f"opensearch://admin:admin@{host}:{port}" - opensearch_abcd = ABCD.from_url(url, index_name="test_index", use_ssl=False) + url = f"opensearch://{credential}@{host}:{port}" + opensearch_abcd = ABCD.from_url( + url, + index_name="test_index", + use_ssl=security_enabled, + ) assert isinstance(opensearch_abcd, OpenSearchDatabase) return opensearch_abcd @@ -78,10 +87,9 @@ def test_push(self, abcd): assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) + abcd.refresh() result = AtomsModel( - None, - None, - abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + dict=abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() assert atoms_1 == result assert atoms_2 != result @@ -117,17 +125,14 @@ def test_bulk(self, abcd): atoms_list.append(atoms_1) atoms_list.append(atoms_2) abcd.push(atoms_list) + abcd.refresh() assert abcd.count() == 2 result_1 = AtomsModel( - None, - None, - abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + dict=abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], ).to_ase() result_2 = AtomsModel( - None, - None, - abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + dict=abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], ).to_ase() assert atoms_1 == result_1 assert atoms_2 == result_2 @@ -151,4 +156,5 @@ def test_count(self, abcd): atoms.set_cell([1, 1, 1]) abcd.push(atoms) abcd.push(atoms) + abcd.refresh() assert abcd.count() == 2 From b1152ea25201a8932cf72f5eaf44da2e393ef255 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 13 Jun 2024 13:13:13 +0200 Subject: [PATCH 096/112] Fix histogram query --- abcd/backends/atoms_opensearch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index e4ef72bf..9a795cee 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -966,7 +966,7 @@ def hist( query = self.parser(query) logger.info("parsed query: %s", query) - data = self.property(name, query) + data = self.property(name, query=query) return utils.histogram(name, data, **kwargs) def __repr__(self): From c82ec9dcad5d8df3770ecf4f86ae54da5dd8ac56 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 13:48:21 +0000 Subject: [PATCH 097/112] Remove black and flake8 --- .flake8 | 2 +- pyproject.toml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.flake8 b/.flake8 index 94f58d25..c0f0c98e 100644 --- a/.flake8 +++ b/.flake8 @@ -5,7 +5,7 @@ max-line-length = 88 ignore = E203,W503 # ignore conflicts with black application-import-names = abcd,tests import-order-style = google -exclude = +exclude = abcd/backends/atoms_pymongo.py, abcd/frontends, abcd/model.py, diff --git a/pyproject.toml b/pyproject.toml index e37280c9..302e8575 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,8 +24,6 @@ python = "^3.9" tqdm = "^4.66" [tool.poetry.group.dev.dependencies] -black = "^22.3.0" -flake8 = "^3.7.9" mongomock = "^4.1.2" openmock = "^2.2" pytest = "^8.2.2" From 7dd21eb49df3d05c0c6ef4004d33d8c9be6fe585 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 13:50:50 +0000 Subject: [PATCH 098/112] Apply ruff --- README.md | 2 +- abcd/backends/atoms_opensearch.py | 29 +++++++++--------------- abcd/backends/atoms_properties.py | 8 ++++--- abcd/frontends/commandline/commands.py | 2 +- abcd/frontends/commandline/decorators.py | 3 ++- tests/test_cli.py | 13 +++++------ tests/test_mongomock.py | 3 +-- 7 files changed, 27 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 938a3413..882f47fe 100644 --- a/README.md +++ b/README.md @@ -144,7 +144,7 @@ To access it: docker pull stenczelt/projection-abcd:latest ``` -2. create a docker network, which enables the containers to communicate with each other and the outside world as well +2. create a docker network, which enables the containers to communicate with each other and the outside world as well ```sh docker network create --driver bridge abcd-network ``` diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 9a795cee..5acb1d0d 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -1,20 +1,20 @@ from __future__ import annotations -from collections.abc import Iterator +from collections.abc import Iterable, Iterator from datetime import datetime -from typing import Iterable, Optional, Union import logging from os import linesep from pathlib import Path +from typing import Optional, Union from ase import Atoms from ase.io import iread from opensearchpy import ( - OpenSearch, - helpers, AuthenticationException, ConnectionTimeout, + OpenSearch, RequestError, + helpers, ) from abcd.backends import utils @@ -24,7 +24,6 @@ from abcd.parsers import extras from abcd.queryset import AbstractQuerySet - logger = logging.getLogger(__name__) map_types = { @@ -768,18 +767,16 @@ def get_type_of_property(self, prop: str, category: str) -> str: if category == "arrays": if isinstance(data[0], list): - return "array({}, N x {})".format( - map_types[type(data[0][0])], len(data[0]) - ) - return "vector({}, N)".format(map_types[type(data[0])]) + return f"array({map_types[type(data[0][0])]}, N x {len(data[0])})" + return f"vector({map_types[type(data[0])]}, N)" if isinstance(data, list): if isinstance(data[0], list): if isinstance(data[0][0], list): return "list(list(...)" - return "array({})".format(map_types[type(data[0][0])]) - return "vector({})".format(map_types[type(data[0])]) - return "scalar({})".format(map_types[type(data)]) + return f"array({map_types[type(data[0][0])]})" + return f"vector({map_types[type(data[0])]})" + return f"scalar({map_types[type(data)]})" def count_properties(self, query: Optional[Union[dict, str]] = None) -> dict: """ @@ -984,11 +981,7 @@ def __repr__(self): else: host, port = None, None - return ( - f"{self.__class__.__name__}(" - f"url={host}:{port}, " - f"index={self.index_name}) " - ) + return f"{self.__class__.__name__}(url={host}:{port}, index={self.index_name}) " def _repr_html_(self): """ @@ -1008,7 +1001,7 @@ def print_info(self): [ "{:=^50}".format(" ABCD OpenSearch "), "{:>10}: {}".format("type", "opensearch"), - linesep.join("{:>10}: {}".format(k, v) for k, v in self.info().items()), + linesep.join(f"{k:>10}: {v}" for k, v in self.info().items()), ] ) diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 7372e797..9d766a90 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -1,9 +1,11 @@ from __future__ import annotations -import pandas as pd -import numpy as np -from typing import Union + from pathlib import Path +from typing import Union + import chardet +import numpy as np +import pandas as pd class Properties: diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index 87c02e9b..98683f90 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -3,8 +3,8 @@ import numpy as np -from abcd.frontends.commandline.decorators import check_remote, init_config, init_db from abcd.backends.atoms_opensearch import OpenSearchDatabase +from abcd.frontends.commandline.decorators import check_remote, init_config, init_db logger = logging.getLogger(__name__) diff --git a/abcd/frontends/commandline/decorators.py b/abcd/frontends/commandline/decorators.py index ce509004..a6f6eaa9 100644 --- a/abcd/frontends/commandline/decorators.py +++ b/abcd/frontends/commandline/decorators.py @@ -1,5 +1,6 @@ -import logging import functools +import logging + from abcd import ABCD from abcd.frontends.commandline.config import Config diff --git a/tests/test_cli.py b/tests/test_cli.py index 3089affc..262cbc1a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,7 +6,6 @@ import pytest - DATA_PATH = Path(__file__).parent / "data" NOT_GTHUB_ACTIONS = True @@ -49,13 +48,13 @@ def test_summary(self, abcd): subprocess.run( f"abcd upload {data_file} -i -e 'test_data'", shell=True, check=True ) - subprocess.run(f"abcd refresh", shell=True, check=True) + subprocess.run("abcd refresh", shell=True, check=True) summary = subprocess.run( "abcd summary", shell=True, check=True, capture_output=True, text=True ) assert "Total number of configurations" in summary.stdout - subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + subprocess.run("abcd delete -q 'test_data' -y", shell=True) def test_query(self, abcd): """ @@ -70,7 +69,7 @@ def test_query(self, abcd): subprocess.run( f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True ) - subprocess.run(f"abcd refresh", shell=True, check=True) + subprocess.run("abcd refresh", shell=True, check=True) summary = subprocess.run( "abcd show -p n_atoms -q 'n_atoms : 2'", @@ -88,7 +87,7 @@ def test_query(self, abcd): text=True, ) assert "3" in summary.stdout and "2" not in summary.stdout - subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + subprocess.run("abcd delete -q 'test_data' -y", shell=True) def test_range_query(self, abcd): """ @@ -103,7 +102,7 @@ def test_range_query(self, abcd): subprocess.run( f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True ) - subprocess.run(f"abcd refresh", shell=True, check=True) + subprocess.run("abcd refresh", shell=True, check=True) summary = subprocess.run( "abcd summary -p energy -q 'energy:[-100 TO -99]'", @@ -122,4 +121,4 @@ def test_range_query(self, abcd): text=True, ) assert "Total number of configurations: 2" in summary.stdout - subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + subprocess.run("abcd delete -q 'test_data' -y", shell=True) diff --git a/tests/test_mongomock.py b/tests/test_mongomock.py index 5444fe59..2e328e48 100644 --- a/tests/test_mongomock.py +++ b/tests/test_mongomock.py @@ -1,9 +1,8 @@ from io import StringIO import logging -import unittest -from ase.io import read from ase.atoms import Atoms +from ase.io import read import mongomock import pytest From f442d206d71ca92e6b2d28e837e44d34ac2b4cb2 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 13:51:02 +0000 Subject: [PATCH 099/112] Apply ruff unsafe fixes --- abcd/backends/atoms_opensearch.py | 53 +++++++++++++++---------------- abcd/backends/atoms_properties.py | 9 +++--- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 5acb1d0d..8d5c4fd7 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -5,7 +5,6 @@ import logging from os import linesep from pathlib import Path -from typing import Optional, Union from ase import Atoms from ase.io import iread @@ -39,7 +38,7 @@ class OpenSearchQuery(AbstractQuerySet): """Class to parse and build queries for OpenSearch.""" - def __call__(self, query: Optional[Union[dict, str, list]]) -> Optional[dict]: + def __call__(self, query: dict | str | list | None) -> dict | None: """ Parses and builds queries for OpenSearch. @@ -115,9 +114,9 @@ class AtomsModel(AbstractModel): def __init__( self, - client: Optional[OpenSearch] = None, - index_name: Optional[str] = None, - dict: Optional[dict] = None, + client: OpenSearch | None = None, + index_name: str | None = None, + dict: dict | None = None, ): """ Initialises class. @@ -142,7 +141,7 @@ def from_atoms( client: OpenSearch, index_name: str, atoms: Atoms, - extra_info: Optional[dict] = None, + extra_info: dict | None = None, store_calc: bool = True, ) -> AtomsModel: """ @@ -173,7 +172,7 @@ def from_atoms( return obj @property - def _id(self) -> Optional[str]: + def _id(self) -> str | None: """ Get the OpenSearch document ID stored in data. @@ -316,7 +315,7 @@ def info(self): "type": "opensearch", } - def delete(self, query: Optional[Union[dict, str]] = None): + def delete(self, query: dict | str | None = None): """ Deletes documents from the database. @@ -375,8 +374,8 @@ def save_bulk(self, actions: Iterable[dict], **kwargs): def push( self, - atoms: Union[Atoms, Iterable], - extra_info: Optional[Union[dict, str, list]] = None, + atoms: Atoms | Iterable, + extra_info: dict | str | list | None = None, store_calc: bool = True, **kwargs, ): @@ -431,7 +430,7 @@ def push( def upload( self, file: Path, - extra_infos: Union[Iterable, dict] = (), + extra_infos: Iterable | dict = (), store_calc: bool = True, ): """ @@ -461,7 +460,7 @@ def upload( data = iread(str(file)) self.push(data, extra_info, store_calc=store_calc) - def get_items(self, query: Optional[Union[dict, str]] = None) -> Iterator[dict]: + def get_items(self, query: dict | str | None = None) -> Iterator[dict]: """ Get data as a dictionary from documents in the database. @@ -488,7 +487,7 @@ def get_items(self, query: Optional[Union[dict, str]] = None) -> Iterator[dict]: ): yield {"_id": hit["_id"], **hit["_source"]} - def get_atoms(self, query: Optional[Union[dict, str]] = None) -> Iterator[Atoms]: + def get_atoms(self, query: dict | str | None = None) -> Iterator[Atoms]: """ Get data as Atoms object from documents in the database. @@ -515,7 +514,7 @@ def get_atoms(self, query: Optional[Union[dict, str]] = None) -> Iterator[Atoms] ): yield AtomsModel(dict=hit["_source"]).to_ase() - def count(self, query: Optional[Union[dict, str]] = None, timeout=30.0) -> int: + def count(self, query: dict | str | None = None, timeout=30.0) -> int: """ Counts number of documents in the database. @@ -542,8 +541,8 @@ def count(self, query: Optional[Union[dict, str]] = None, timeout=30.0) -> int: def _get_props_from_source( self, - names: Union[str, list[str]], - query: Optional[Union[dict, str]] = None, + names: str | list[str], + query: dict | str | None = None, ) -> dict: """ Gets all values of specified properties using the original data from _source. @@ -578,10 +577,10 @@ def _get_props_from_source( def property( self, - names: Union[str, list[str]], + names: str | list[str], allow_flatten: bool = True, - query: Optional[Union[dict, str]] = None, - ) -> Union[dict, list]: + query: dict | str | None = None, + ) -> dict | list: """ Gets all values of specified properties for matching documents in the database. @@ -645,7 +644,7 @@ def property( return props[names[0]] return props - def count_property(self, name, query: Optional[Union[dict, str]] = None) -> dict: + def count_property(self, name, query: dict | str | None = None) -> dict: """ Counts values of a specified property for matching documents in the database. This method much faster than performing a Count on the list @@ -686,7 +685,7 @@ def count_property(self, name, query: Optional[Union[dict, str]] = None) -> dict return prop - def properties(self, query: Optional[Union[dict, str]] = None) -> dict: + def properties(self, query: dict | str | None = None) -> dict: """ Gets lists of all properties from matching documents, separated into info, derived, and array properties. @@ -778,7 +777,7 @@ def get_type_of_property(self, prop: str, category: str) -> str: return f"vector({map_types[type(data[0])]})" return f"scalar({map_types[type(data)]})" - def count_properties(self, query: Optional[Union[dict, str]] = None) -> dict: + def count_properties(self, query: dict | str | None = None) -> dict: """ Counts all properties from matching documents. @@ -838,7 +837,7 @@ def count_properties(self, query: Optional[Union[dict, str]] = None) -> dict: return properties - def add_property(self, data: dict, query: Optional[Union[dict, str]] = None): + def add_property(self, data: dict, query: dict | str | None = None): """ Adds properties to matching documents. @@ -871,7 +870,7 @@ def add_property(self, data: dict, query: Optional[Union[dict, str]] = None): ) def rename_property( - self, name: str, new_name: str, query: Optional[Union[dict, str]] = None + self, name: str, new_name: str, query: dict | str | None = None ): """ Renames property for all matching documents. @@ -908,7 +907,7 @@ def rename_property( self.client.update_by_query(index=self.index_name, body=body) - def delete_property(self, name: str, query: Optional[Union[dict, str]] = None): + def delete_property(self, name: str, query: dict | str | None = None): """ Deletes property from all matching documents. @@ -942,8 +941,8 @@ def delete_property(self, name: str, query: Optional[Union[dict, str]] = None): self.client.update_by_query(index=self.index_name, body=body) def hist( - self, name: str, query: Optional[Union[dict, str]] = None, **kwargs - ) -> Optional[dict]: + self, name: str, query: dict | str | None = None, **kwargs + ) -> dict | None: """ Calculate histogram statistics for a property from all matching documents. diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 9d766a90..49579ec4 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -1,7 +1,6 @@ from __future__ import annotations from pathlib import Path -from typing import Union import chardet import numpy as np @@ -36,11 +35,11 @@ class Properties: def __init__( self, - data_file: Union[str, Path], + data_file: str | Path, store_struct_file: bool = False, - struct_file_template: Union[str, None] = None, - struct_name_label: Union[str, None] = None, - units: Union[dict, None] = None, + struct_file_template: str | None = None, + struct_name_label: str | None = None, + units: dict | None = None, infer_units: bool = False, encoding: str = "utf-8", ): From f5208b93827f99249f8fd4f750067639f45badbf Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:12:41 +0000 Subject: [PATCH 100/112] Apply changes from ruff --- abcd/backends/atoms_opensearch.py | 4 +--- abcd/backends/atoms_properties.py | 8 ++++---- abcd/backends/utils.py | 8 ++++---- tests/test_opensearch_mock.py | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 8d5c4fd7..1a2cea33 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -940,9 +940,7 @@ def delete_property(self, name: str, query: dict | str | None = None): self.client.update_by_query(index=self.index_name, body=body) - def hist( - self, name: str, query: dict | str | None = None, **kwargs - ) -> dict | None: + def hist(self, name: str, query: dict | str | None = None, **kwargs) -> dict | None: """ Calculate histogram statistics for a property from all matching documents. diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py index 49579ec4..c6fd5594 100644 --- a/abcd/backends/atoms_properties.py +++ b/abcd/backends/atoms_properties.py @@ -84,12 +84,12 @@ def __init__( self.encoding = encoding try: self.df = pd.read_csv(self.data_file, encoding=self.encoding) - except UnicodeDecodeError: + except UnicodeDecodeError as err: detected = chardet.detect(Path(self.data_file).read_bytes()) raise ValueError( f"File cannot be decoded using encoding: {self.encoding}." f" Detected encoding: {detected}." - ) + ) from err except pd.errors.ParserError: self.df = pd.read_excel(self.data_file, header=0) @@ -154,11 +154,11 @@ def set_struct_files(self): for i in range(len(self.df)): try: struct_name = self.df.iloc[i][self.struct_name_label] - except KeyError: + except KeyError as err: raise ValueError( f"{self.struct_name_label} is not a valid column in " "the data loaded." - ) + ) from err struct_file = self.get_struct_file(struct_name) self.struct_files.append(struct_file) diff --git a/abcd/backends/utils.py b/abcd/backends/utils.py index e55471eb..ca0157ca 100644 --- a/abcd/backends/utils.py +++ b/abcd/backends/utils.py @@ -19,18 +19,18 @@ def histogram(name, data, **kwargs): print("Mixed type error of the %s property!", name) return None - if ptype == float: + if isinstance(data[0], float): bins = kwargs.get("bins", 10) return _hist_float(name, data, bins) - if ptype == int: + if isinstance(data[0], int): bins = kwargs.get("bins", 10) return _hist_int(name, data, bins) - if ptype == str: + if isinstance(data[0], str): return _hist_str(name, data, **kwargs) - if ptype == datetime: + if isinstance(data[0], datetime): bins = kwargs.get("bins", 10) return _hist_date(name, data, bins) diff --git a/tests/test_opensearch_mock.py b/tests/test_opensearch_mock.py index 94dd6711..4a2b0a54 100644 --- a/tests/test_opensearch_mock.py +++ b/tests/test_opensearch_mock.py @@ -56,7 +56,7 @@ def test_create(self, abcd): abcd.destroy() abcd.create() assert abcd.client.indices.exists("test_index") is True - abcd.client.indices.exists("fake_index") is False + assert abcd.client.indices.exists("fake_index") is False def test_push(self, abcd): """ From 362effa279260ddd9985823733652ca76822e606 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:14:31 +0000 Subject: [PATCH 101/112] Simplify CI matrix --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02abfa34..317fcf4c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,8 +9,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9", "3.10", "3.11", "3.12" ] - opensearch: ['1.0.1', '2.0.1', 'latest'] + python-version: ["3.10", "3.11", "3.12"] + opensearch: ["latest"] security-enabled: ["true", "false"] steps: From d7a62cfea207738d338f8854db69f9bd87ae4fc1 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:45:23 +0000 Subject: [PATCH 102/112] Simplify connections --- abcd/__init__.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/abcd/__init__.py b/abcd/__init__.py index a4f7d456..ee9a448e 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -1,16 +1,9 @@ -from enum import Enum import logging from urllib import parse logger = logging.getLogger(__name__) -class ConnectionType(Enum): - mongodb = 1 - http = 2 - opensearch = 3 - - class ABCD: @classmethod def from_config(cls, config): @@ -26,8 +19,9 @@ def from_url(cls, url, **kwargs): db = r.path.split("/")[1] if r.path else None db = db if db else "abcd" + scheme = r.scheme - if ConnectionType[r.scheme] is ConnectionType.mongodb: + if scheme == "mongodb": conn_settings = { "host": r.hostname, "port": r.port, @@ -39,14 +33,14 @@ def from_url(cls, url, **kwargs): from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, **conn_settings, **kwargs) - if r.scheme == "mongodb+srv": + if scheme == "mongodb+srv": db = r.path.split("/")[1] if r.path else None db = db if db else "abcd" from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, host=r.geturl(), uri_mode=True, **kwargs) - if ConnectionType[r.scheme] is ConnectionType.opensearch: + if scheme == "opensearch": conn_settings = { "host": r.hostname, "port": r.port, @@ -57,9 +51,9 @@ def from_url(cls, url, **kwargs): from abcd.backends.atoms_opensearch import OpenSearchDatabase return OpenSearchDatabase(db=db, **conn_settings, **kwargs) - if r.scheme == "http" or r.scheme == "https": + if scheme in ("http", "https"): raise NotImplementedError("http not yet supported! soon...") - if r.scheme == "ssh": + if scheme == "ssh": raise NotImplementedError("ssh not yet supported! soon...") raise NotImplementedError( f"Unable to recognise the type of connection. (url: {url})" From 3cd7034bbba0663a84b5196f87c571463170f36e Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:59:59 +0000 Subject: [PATCH 103/112] Update OpenSearch --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 302e8575..2784ebe8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" openpyxl = "^3.1.2" -opensearch-py = "^2.4.0" +opensearch-py = "^2.8.0" pandas = "^2.2" pymongo = "^4.7.3" python = "^3.9" From 1c0b3489ec3cbfeda6bf07d1ffe901ebe199ab89 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:07:12 +0000 Subject: [PATCH 104/112] Fix calculator with no parameters --- abcd/backends/atoms_opensearch.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 1a2cea33..68b808c0 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -430,7 +430,7 @@ def push( def upload( self, file: Path, - extra_infos: Iterable | dict = (), + extra_infos: Iterable | dict | None = (), store_calc: bool = True, ): """ @@ -440,7 +440,7 @@ def upload( ---------- file: Path Path to file to be uploaded - extra_infos: Union[Iterable, dict] + extra_infos: Iterable | dict | None Extra information to store in the document with the atoms data. Default is `()`. store_calc: bool, optional @@ -452,8 +452,9 @@ def upload( file = Path(file) extra_info = {} - for info in extra_infos: - extra_info.update(extras.parser.parse(info)) + if extra_infos: + for info in extra_infos: + extra_info.update(extras.parser.parse(info)) extra_info["filename"] = str(file) @@ -762,7 +763,10 @@ def get_type_of_property(self, prop: str, category: str) -> str: body={"size": 1, "query": {"exists": {"field": prop}}}, ) - data = atoms["hits"]["hits"][0]["_source"][prop] + try: + data = atoms["hits"]["hits"][0]["_source"][prop] + except IndexError: + return "null" if category == "arrays": if isinstance(data[0], list): From ad3a152f1182afe28fddb7dec08a9bd99b606d31 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:45:49 +0000 Subject: [PATCH 105/112] Tidy tests --- .ci/opensearch/action.yml | 2 +- tests/test_cli.py | 39 ++++++++++++++--------------------- tests/test_opensearch.py | 29 ++++++++++++-------------- tests/test_opensearch_mock.py | 31 +++++++++++----------------- 4 files changed, 41 insertions(+), 60 deletions(-) diff --git a/.ci/opensearch/action.yml b/.ci/opensearch/action.yml index f917a612..63e0ec03 100644 --- a/.ci/opensearch/action.yml +++ b/.ci/opensearch/action.yml @@ -20,7 +20,7 @@ inputs: opensearch-initial-admin-password: description: 'The password for the user admin in your cluster' required: false - default: 'myStrongPassword123!' + default: 'myStrongPassword_123' runs: using: 'docker' diff --git a/tests/test_cli.py b/tests/test_cli.py index 262cbc1a..9565d451 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,18 +20,15 @@ class TestCli: @pytest.fixture(autouse=True) def abcd(self): """Set up OpenSearch database connection and login with CLI.""" - security_enabled = os.getenv("security_enabled") == "true" - port = int(os.environ["port"]) - host = "localhost" - if os.environ["opensearch-version"] == "latest": - credential = "admin:myStrongPassword123!" - else: - credential = "admin:admin" - logging.basicConfig(level=logging.INFO) - url = f"opensearch://{credential}@{host}:{port}" - if not security_enabled: + self.port = int(os.environ.get("port", 9200)) + self.host = "localhost" + self.credential = "admin:myStrongPassword_123" + self.security_enabled = os.getenv("security_enabled") == "true" + + url = f"opensearch://{self.credential}@{self.host}:{self.port}" + if not self.security_enabled: url += " --disable_ssl" try: subprocess.run(f"abcd login {url}", shell=True, check=True) @@ -39,10 +36,8 @@ def abcd(self): sleep(10) subprocess.run(f"abcd login {url}", shell=True, check=True) - def test_summary(self, abcd): - """ - Test summary output of uploaded data file. - """ + def test_summary(self): + """Test summary output of uploaded data file.""" data_file = DATA_PATH / "example.xyz" subprocess.run( @@ -56,10 +51,8 @@ def test_summary(self, abcd): assert "Total number of configurations" in summary.stdout subprocess.run("abcd delete -q 'test_data' -y", shell=True) - def test_query(self, abcd): - """ - Test lucene-style query. - """ + def test_query(self): + """Test lucene-style query.""" data_file_1 = DATA_PATH / "example.xyz" data_file_2 = DATA_PATH / "example_2.xyz" @@ -89,18 +82,16 @@ def test_query(self, abcd): assert "3" in summary.stdout and "2" not in summary.stdout subprocess.run("abcd delete -q 'test_data' -y", shell=True) - def test_range_query(self, abcd): - """ - Test lucene-style ranged query. - """ + def test_range_query(self): + """Test lucene-style ranged query.""" data_file_1 = DATA_PATH / "example.xyz" data_file_2 = DATA_PATH / "example_2.xyz" subprocess.run( - f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + f"abcd upload {data_file_1} -e 'test_data'", shell=True, check=True ) subprocess.run( - f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + f"abcd upload {data_file_2} -e 'test_data'", shell=True, check=True ) subprocess.run("abcd refresh", shell=True, check=True) diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py index 1735bca9..2c350492 100644 --- a/tests/test_opensearch.py +++ b/tests/test_opensearch.py @@ -23,29 +23,26 @@ class TestOpenSearch: @pytest.fixture(autouse=True) def abcd(self): """Set up OpenSearch database connection.""" - security_enabled = os.getenv("security_enabled") == "true" - self.port = int(os.environ["port"]) - self.host = "localhost" - if os.environ["opensearch-version"] == "latest": - credential = "admin:myStrongPassword123!" - else: - credential = "admin:admin" - logging.basicConfig(level=logging.INFO) - url = f"opensearch://{credential}@{self.host}:{self.port}" + self.port = int(os.environ.get("port", 9200)) + self.host = "localhost" + self.credential = "admin:myStrongPassword_123" + self.security_enabled = os.getenv("security_enabled") == "true" + + url = f"opensearch://{self.credential}@{self.host}:{self.port}" try: abcd_opensearch = ABCD.from_url( url, index_name="test_index", - use_ssl=security_enabled, + use_ssl=self.security_enabled, ) except (ConnectionError, ConnectionResetError): sleep(10) abcd_opensearch = ABCD.from_url( url, index_name="test_index", - use_ssl=security_enabled, + use_ssl=self.security_enabled, ) assert isinstance(abcd_opensearch, OpenSearchDatabase) @@ -60,12 +57,12 @@ def push_data(self, abcd): Si 0.00000000 0.00000000 0.00000000 """ ) - atoms = read(xyz, format="extxyz") assert isinstance(atoms, Atoms) atoms.set_cell([1, 1, 1]) abcd.push(atoms) abcd.refresh() + return abcd def test_info(self, abcd): """Test printing database info.""" @@ -221,7 +218,7 @@ def test_property(self, abcd): atoms_1 = read(xyz_1, format="extxyz") assert isinstance(atoms_1, Atoms) atoms_1.set_cell([1, 1, 1]) - abcd.push(atoms_1, store_calc=False) + abcd.push(atoms_1, store_calc=True) xyz_2 = StringIO( """2 @@ -234,7 +231,7 @@ def test_property(self, abcd): atoms_2 = read(xyz_2, format="extxyz") assert isinstance(atoms_2, Atoms) atoms_2.set_cell([1, 1, 1]) - abcd.push(atoms_2, store_calc=False) + abcd.push(atoms_2, store_calc=True) abcd.refresh() prop = abcd.property("prop_1") @@ -253,7 +250,7 @@ def test_properties(self, abcd): self.push_data(abcd) props = abcd.properties() expected_props = { - "info": ["_vtk_test", "cell", "formula", "n_atoms", "pbc", "s", "volume"], + "info": ["_vtk_test", "cell", "formula", "n_atoms", "pbc", "s"], "derived": [ "elements", "hash", @@ -442,7 +439,6 @@ def test_get_items(self, abcd): "cell", "pbc", "formula", - "volume", ], "derived_keys": [ "elements", @@ -452,6 +448,7 @@ def test_get_items(self, abcd): "volume", "hash_structure", "hash", + "volume", ], "arrays_keys": ["numbers", "positions"], "results_keys": [], diff --git a/tests/test_opensearch_mock.py b/tests/test_opensearch_mock.py index 4a2b0a54..85ae3cba 100644 --- a/tests/test_opensearch_mock.py +++ b/tests/test_opensearch_mock.py @@ -18,25 +18,22 @@ class TestOpenSearchMock: @openmock def abcd(self): """Set up database connection.""" + logging.basicConfig(level=logging.INFO) - if "port" in os.environ: - port = int(os.environ["port"]) - else: - port = 9200 - host = "localhost" - security_enabled = os.getenv("security_enabled") == "true" - if os.environ["opensearch-version"] == "latest": - credential = "admin:myStrongPassword123!" + self.port = int(os.environ.get("port", 9200)) + self.host = "localhost" + self.credential = "admin:myStrongPassword_123" + if os.getenv("GITHUB_ACTIONS") == "true": + self.security_enabled = os.getenv("security_enabled") == "true" else: - credential = "admin:admin" - - logging.basicConfig(level=logging.INFO) + # Otherwise assume local OpenSearch is enabled + self.security_enabled = True - url = f"opensearch://{credential}@{host}:{port}" + url = f"opensearch://{self.credential}@{self.host}:{self.port}" opensearch_abcd = ABCD.from_url( url, index_name="test_index", - use_ssl=security_enabled, + use_ssl=self.security_enabled, ) assert isinstance(opensearch_abcd, OpenSearchDatabase) return opensearch_abcd @@ -95,9 +92,7 @@ def test_push(self, abcd): assert atoms_2 != result def test_bulk(self, abcd): - """ - Test pushing atoms object to database together. - """ + """Test pushing atoms object to database together.""" abcd.destroy() abcd.create() xyz_1 = StringIO( @@ -138,9 +133,7 @@ def test_bulk(self, abcd): assert atoms_2 == result_2 def test_count(self, abcd): - """ - Test counting the number of documents in the database. - """ + """Test counting the number of documents in the database.""" abcd.destroy() abcd.create() xyz = StringIO( From 2ae02bfa9cb0b422ee1c93263e29115161d3bbd7 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 28 Mar 2025 12:35:55 +0000 Subject: [PATCH 106/112] Tidy building script --- abcd/backends/atoms_opensearch.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index 68b808c0..a8e5d92a 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -891,8 +891,8 @@ def rename_property( query = self.parser(query) logger.info("rename: query=%s, old=%s, new=%s", query, name, new_name) - script_txt = "if (!ctx._source.containsKey(params.new_name)) { " - script_txt += ( + script_txt = ( + "if (!ctx._source.containsKey(params.new_name)) { " f"ctx._source.{new_name} = ctx._source.{name};" " ctx._source.remove(params.name);" " for (int i=0; i Date: Fri, 28 Mar 2025 12:44:08 +0000 Subject: [PATCH 107/112] Tidy code from review --- abcd/backends/atoms_opensearch.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index a8e5d92a..e4c80fab 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -560,9 +560,8 @@ def _get_props_from_source( dict Dictionary of lists of values for the specified properties. """ - props = {} hits = [ - dict(hit["_source"].items()) + hit["_source"].copy() for hit in helpers.scan( self.client, index=self.index_name, @@ -572,9 +571,7 @@ def _get_props_from_source( ) if "_source" in hit and all(name in hit["_source"] for name in names) ] - for name in names: - props[name] = [hit[name] for hit in hits] - return props + return {name: [hit[name] for hit in hits] for name in names} def property( self, @@ -663,14 +660,15 @@ def count_property(self, name, query: dict | str | None = None) -> dict: """ query = self.parser(query) logger.info("parsed query: %s", query) + prop_name = format(name) body = { "size": 0, "query": query, "aggs": { - format(name): { + prop_name: { "terms": { - "field": format(name), + "field": prop_name, "size": 10000, # Use composite for all results? }, }, @@ -680,7 +678,7 @@ def count_property(self, name, query: dict | str | None = None) -> dict: prop = {} for val in self.client.search(index=self.index_name, body=body)["aggregations"][ - format(name) + prop_name ]["buckets"]: prop[val["key"]] = val["doc_count"] From 2bb5eb31bcddab2fa8a99c0ea3be3ffd0683da09 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 28 Mar 2025 12:55:07 +0000 Subject: [PATCH 108/112] Remove unused flake8 file --- .flake8 | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index c0f0c98e..00000000 --- a/.flake8 +++ /dev/null @@ -1,16 +0,0 @@ -[flake8] -select = B,BLK,C,E,F,I,S,W -max-complexity = 30 -max-line-length = 88 -ignore = E203,W503 # ignore conflicts with black -application-import-names = abcd,tests -import-order-style = google -exclude = - abcd/backends/atoms_pymongo.py, - abcd/frontends, - abcd/model.py, - abcd/parsers/queries_new.py, - abcd/parsers/queries.py, - abcd/parsers/extras.py, - abcd/server, - tests/__init__.py From 90434d710a79b40c38348b844c75ec918761b1f0 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 28 Mar 2025 12:58:09 +0000 Subject: [PATCH 109/112] Formatting --- abcd/server/app/db.py | 4 +--- tests/test_properties.py | 28 +++++++--------------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/abcd/server/app/db.py b/abcd/server/app/db.py index 7430c183..df0bb492 100644 --- a/abcd/server/app/db.py +++ b/abcd/server/app/db.py @@ -4,9 +4,7 @@ class Database(ABCD): - """ - Wrapper for the ABCD factory method for registering a the database for the Flask application. - """ + """Wrapper for factory method for registering database for a Flask application.""" def __init__(self): super().__init__() diff --git a/tests/test_properties.py b/tests/test_properties.py index 1578fc64..2c89f89d 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -17,16 +17,12 @@ def property(self): return Properties(data_file) def test_dataframe(self, property): - """ - Test data correctly stored in pandas DataFrame. - """ + """Test data correctly stored in pandas DataFrame.""" assert isinstance(property.df, DataFrame) assert len(property.df) == 3 def test_specify_units(self, property): - """ - Test units can be specified manually, if they match existing fields. - """ + """Test units can be specified manually, if they match existing fields.""" input_units_1 = {"Integers": "items", "Floating": "seconds"} properties_1 = Properties( data_file=property.data_file, @@ -42,9 +38,7 @@ def test_specify_units(self, property): ) def test_infer_units(self, property): - """ - Test units can be inferred from field names. - """ + """Test units can be inferred from field names.""" properties = Properties( data_file=property.data_file, infer_units=True, @@ -63,9 +57,7 @@ def test_infer_units(self, property): assert list(properties.df.columns.values) == expected_fields def test_struct_file(self, property): - """ - Test structure file names can be inferred from a field. - """ + """Test structure file names can be inferred from a field.""" struct_file_template = "test_{struct_name}_file.txt" struct_name_label = "Text" properties_1 = Properties( @@ -102,9 +94,7 @@ def test_struct_file(self, property): ) def test_to_list(self, property): - """ - Test dataframe can be converted into a list of properties. - """ + """Test dataframe can be converted into a list of properties.""" assert len(property.to_list()) == 3 assert isinstance(property.to_list(), list) assert isinstance(property.to_list()[0], dict) @@ -120,9 +110,7 @@ def test_to_list(self, property): assert property.to_list()[0] == expected_property def test_missing_data(self, property): - """ - Test missing data is not included in properties. - """ + """Test missing data is not included in properties.""" expected_property = { "Text": "test", "Integers": 2, @@ -134,9 +122,7 @@ def test_missing_data(self, property): assert property.to_list()[1] == expected_property def test_to_list_units(self, property): - """ - Test units are included in properties when converting to a list. - """ + """Test units are included in properties when converting to a list.""" properties_1 = Properties( data_file=property.data_file, infer_units=True, From 710b87e8ab12651fc15fdb0bedf628f9086ae91d Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 28 Mar 2025 13:02:16 +0000 Subject: [PATCH 110/112] Update ruff --- .pre-commit-config.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4e44c9f5..20e716d4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.9.6 + rev: v0.11.2 hooks: # Run the linter. - id: ruff diff --git a/pyproject.toml b/pyproject.toml index 2784ebe8..7de9f329 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ pytest-mock = "^3.14.0" optional = true [tool.poetry.group.pre-commit.dependencies] pre-commit = "^4.0.1" -ruff = "^0.9.6" +ruff = "^0.11.2" [tool.poetry.extras] http = ["requests"] From 2a8d6a0ad322e00f7bf18c08459efa351fd10483 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 28 Mar 2025 13:06:52 +0000 Subject: [PATCH 111/112] Raise errors rather than exit --- abcd/frontends/commandline/decorators.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/abcd/frontends/commandline/decorators.py b/abcd/frontends/commandline/decorators.py index a6f6eaa9..73c44c77 100644 --- a/abcd/frontends/commandline/decorators.py +++ b/abcd/frontends/commandline/decorators.py @@ -24,12 +24,10 @@ def wrapper(*args, config, **kwargs): use_ssl = config.get("use_ssl", None) if url is None: - print("Please use abcd login first!") - exit(1) + raise ConnectionError("Please use abcd login first!") if use_ssl is None: - print("use_ssl has not been saved. Please login again") - exit(1) + raise ConnectionError("use_ssl has not been saved. Please login again") db = ABCD.from_url(url=url, use_ssl=use_ssl) @@ -52,8 +50,9 @@ def check_remote(func): @functools.wraps(func) def wrapper(*args, **kwargs): if kwargs.pop("remote"): - print("In read only mode, you can't modify the data in the database") - exit(1) + raise PermissionError( + "In read only mode, you can't modify the data in the database" + ) func(*args, **kwargs) From 752984ebcdb6254a7541df7fe0fbba2fa9b483ed Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Mon, 31 Mar 2025 18:11:14 +0100 Subject: [PATCH 112/112] Update README for OpenSearch password --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 882f47fe..cb9d170e 100644 --- a/README.md +++ b/README.md @@ -95,26 +95,26 @@ The above login command will place create an `~/.abcd` file with the following c ``` ### OpenSearch -If you have an already running OpenSearch server, or install your own, then you are ready to go. Alternatively, +If you have an already running OpenSearch server, or installed your own, then you are ready to go. Alternatively, ```sh sudo swapoff -a # optional sudo sysctl -w vm.swappiness=1 # optional sudo sysctl -w fs.file-max=262144 # optional sudo sysctl -w vm.max_map_count=262144 -docker run -d --rm --name abcd-opensearch -v :/data/db -p 9200:9200 --env discovery.type=single-node -it opensearchproject/opensearch:latest +docker run -d --name abcd-opensearch -v :/data/db -p 9200:9200 -e "discovery.type=single-node" -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=" -it opensearchproject/opensearch:latest ``` will download and install an OpenSearch image and run it. The connection can be tested with: ```sh -curl -vvv -s --insecure -u admin:admin --fail https://localhost:9200 +curl -vvv -s --insecure -u admin: --fail https://localhost:9200 ``` To connect to an OpenSearch database that is already running, use ```sh -abcd login opensearch://username:password@localhost +abcd login opensearch://:@localhost ``` ## Remote access