diff --git a/.github/workflows/dev-build.yml b/.github/workflows/dev-build.yml index baa8c0c..090bb88 100644 --- a/.github/workflows/dev-build.yml +++ b/.github/workflows/dev-build.yml @@ -1,64 +1,83 @@ - -# This worklflow will perform following actions when the code is pushed to the development branch: -# - Build the latest docker image in development which needs test to pass first. -# - Push the docker image to Docker Hub under namespace - nfdi4chem with tag:dev-latest. -# +# This workflow will perform following actions when code is pushed to the development branch: +# - Run tests and linting checks (can be enabled via needs: test_and_lint) +# - Build and push nmrKit Docker image with layer caching for faster builds +# - Conditionally build nmr-cli image only if files in app/scripts/nmr-cli/ changed +# - Push images to Docker Hub under namespace nfdi4chem with dev-latest tag +# - Prevent redundant builds using concurrency control +# # Maintainers: # - name: Nisha Sharma # - email: nisha.sharma@uni-jena.de -name : Dev Build, Test and Publish +name: Prod Build and Publish to Dev on: push: branches: [development] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + env: - DOCKER_HUB_USERNAME : ${{ secrets.DOCKER_USERNAME }} - DOCKER_HUB_PASSWORD : ${{ secrets.DOCKER_PASSWORD }} NMRKIT_REPOSITORY_NAME: nmrkit NMR_CLI_REPOSITORY_NAME: nmr-cli REPOSITORY_NAMESPACE: nfdi4chem RELEASE_TAG: dev-latest jobs: - test_and_lint: - uses: ./.github/workflows/test.yml - - push_to_registry: + # test_and_lint: + # uses: NFDI4Chem/nmrkit/.github/workflows/test.yml@main + build_and_push_to_registry: name: Push Docker image to Docker Hub runs-on: ubuntu-latest - needs: test_and_lint + # needs: test_and_lint steps: + # Clone repository code to runner - name: Check out the repo uses: actions/checkout@v4 - + + # Enable advanced Docker build features (required for caching) + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Authenticate with Docker Hub for image push access - name: Log in to Docker Hub - uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Detect changes in nmr-cli folder to skip unnecessary builds + - name: Check for file changes + id: changes + uses: dorny/paths-filter@v3 with: - username: ${{ env.DOCKER_HUB_USERNAME }} - password: ${{ env.DOCKER_HUB_PASSWORD }} - + filters: | + nmr-cli: + - 'app/scripts/nmr-cli/**' + + # Build main nmrKit image with registry caching for faster builds - name: Build and push nmrKit Docker image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile push: true - build-args: | - RELEASE_VERSION=dev-latest + build-args: RELEASE_VERSION=${{ env.RELEASE_TAG }} tags: ${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMRKIT_REPOSITORY_NAME }}:${{ env.RELEASE_TAG }} - username: ${{ env.DOCKER_HUB_USERNAME }} - password: ${{ env.DOCKER_HUB_PASSWORD }} + cache-from: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMRKIT_REPOSITORY_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMRKIT_REPOSITORY_NAME }}:buildcache,mode=max + # Build nmr-cli image only if files in app/scripts/nmr-cli/ changed - name: Build and push nmr-cli Docker image - uses: docker/build-push-action@v4 + if: steps.changes.outputs.nmr-cli == 'true' + uses: docker/build-push-action@v6 with: context: ./app/scripts/nmr-cli/ file: ./app/scripts/nmr-cli/Dockerfile push: true - build-args: | - RELEASE_VERSION=dev-latest + build-args: RELEASE_VERSION=${{ env.RELEASE_TAG }} tags: ${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMR_CLI_REPOSITORY_NAME }}:${{ env.RELEASE_TAG }} - username: ${{ env.DOCKER_HUB_USERNAME }} - password: ${{ env.DOCKER_HUB_PASSWORD }} \ No newline at end of file + cache-from: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMR_CLI_REPOSITORY_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMR_CLI_REPOSITORY_NAME }}:buildcache,mode=max diff --git a/.github/workflows/prod-build.yml b/.github/workflows/prod-build.yml index b6c4eea..b99f7a4 100644 --- a/.github/workflows/prod-build.yml +++ b/.github/workflows/prod-build.yml @@ -1,60 +1,120 @@ -# This worklflow will perform following actions when the code is pushed to main branch: -# - Build the latest docker image in main which needs test to pass first. -# - Push the docker image to Docker Hub under namespace - nfdi4chem with tag:[release_version]. +# This workflow will perform following actions when code is pushed to the main branch: +# - Run tests and linting checks (can be enabled via needs: test_and_lint) +# - Build and push nmrKit Docker image with layer caching for faster builds +# - Conditionally build nmr-cli image only if files in app/scripts/nmr-cli/ changed +# - Push images to Docker Hub under namespace nfdi4chem with latest tag +# - Prevent redundant builds using concurrency control # # Maintainers: # - name: Nisha Sharma # - email: nisha.sharma@uni-jena.de -name : Prod Build, Test and Publish +name : Prod Build and Publish +# Runs on manual workflow_dispatch with confirmation on: - release: - types: [published] + workflow_dispatch: + inputs: + confirm: + description: "Type 'DEPLOY' to confirm production deployment" + required: true + type: string + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true env: - DOCKER_HUB_USERNAME : ${{ secrets.DOCKER_USERNAME }} - DOCKER_HUB_PASSWORD : ${{ secrets.DOCKER_PASSWORD }} - REPOSITORY_NAME: nmrkit + NMRKIT_REPOSITORY_NAME: nmrkit + NMR_CLI_REPOSITORY_NAME: nmr-cli REPOSITORY_NAMESPACE: nfdi4chem + RELEASE_TAG: latest jobs: - push_to_registry: + # test_and_lint: + # uses: NFDI4Chem/nmrkit/.github/workflows/test.yml@main + + # Guard: confirm input and authorize actor + guard: + name: Access control and confirmation + runs-on: ubuntu-latest + steps: + - name: Validate actor and confirmation + shell: bash + run: | + echo "Actor: ${GITHUB_ACTOR}" + # Confirm input (workflow_dispatch) + if [[ "${{ github.event.inputs.confirm }}" != "DEPLOY" ]]; then + echo "Confirmation token mismatch. Expected 'DEPLOY'." + exit 1 + fi + + # Authorize actor (comma/space separated list in secret) + AUTHORIZED_ACTORS="${{ secrets.PROD_DEPLOY_AUTHORIZED_ACTORS }}" + allowed=0 + for u in ${AUTHORIZED_ACTORS//,/ }; do + if [[ "${u,,}" == "${GITHUB_ACTOR,,}" ]]; then + allowed=1 + break + fi + done + if [[ $allowed -ne 1 ]]; then + echo "User '${GITHUB_ACTOR}' is not authorized to trigger this workflow." + exit 1 + fi + echo "Authorization check passed." + + build_and_push_to_registry: name: Push Docker image to Docker Hub runs-on: ubuntu-latest + needs: guard steps: + # Clone repository code to runner - name: Check out the repo uses: actions/checkout@v4 - - #Fetch Latest release - - name: Fetch latest release - id: fetch-latest-release - uses: InsonusK/get-latest-release@v1.0.1 - with: - myToken: ${{ github.token }} - exclude_types: "draft" - view_top: 10 - - name: "Print release name" - run: | - echo "tag_name: ${{ steps.fetch-latest-release.outputs.tag_name }}" - - #Login to Docker Hub + + # Enable advanced Docker build features (required for caching) + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Authenticate with Docker Hub for image push access - name: Log in to Docker Hub - uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a + uses: docker/login-action@v3 with: - username: ${{ env.DOCKER_HUB_USERNAME }} - password: ${{ env.DOCKER_HUB_PASSWORD }} - - #Build and push Docker image - - name: Build and push Docker image - uses: docker/build-push-action@v4 + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Detect changes in nmr-cli folder to skip unnecessary builds + - name: Check for file changes + id: changes + uses: dorny/paths-filter@v3 + with: + filters: | + nmr-cli: + - 'app/scripts/nmr-cli/**' + + # Build main nmrKit image with registry caching for faster builds + - name: Build and push nmrKit Docker image + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile push: true - build-args: | - RELEASE_VERSION=${{ steps.fetch-latest-release.outputs.tag_name }} - tags: ${{ env.REPOSITORY_NAMESPACE }}/${{ env.REPOSITORY_NAME }}:${{ steps.fetch-latest-release.outputs.tag_name }} - username: ${{ env.DOCKER_HUB_USERNAME }} - password: ${{ env.DOCKER_HUB_PASSWORD }} \ No newline at end of file + build-args: RELEASE_VERSION=${{ env.RELEASE_TAG }} + tags: ${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMRKIT_REPOSITORY_NAME }}:${{ env.RELEASE_TAG }} + cache-from: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMRKIT_REPOSITORY_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMRKIT_REPOSITORY_NAME }}:buildcache,mode=max + + # Build nmr-cli image only if files in app/scripts/nmr-cli/ changed + - name: Build and push nmr-cli Docker image + if: steps.changes.outputs.nmr-cli == 'true' + uses: docker/build-push-action@v6 + with: + context: ./app/scripts/nmr-cli/ + file: ./app/scripts/nmr-cli/Dockerfile + push: true + build-args: RELEASE_VERSION=${{ env.RELEASE_TAG }} + tags: ${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMR_CLI_REPOSITORY_NAME }}:${{ env.RELEASE_TAG }} + cache-from: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMR_CLI_REPOSITORY_NAME }}:buildcache + cache-to: type=registry,ref=${{ env.REPOSITORY_NAMESPACE }}/${{ env.NMR_CLI_REPOSITORY_NAME }}:buildcache,mode=max \ No newline at end of file diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 1f774ca..9ab91ec 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -1,31 +1,20 @@ - -# This worklflow will perform following actions when the code is pushed to main branch. -# - Test linting with pylint. -# - Test the code with pytest. -# - Trigger release-please action to create release which needs test to pass first. -# -# Maintainers: -# - name: Nisha Sharma -# - email: nisha.sharma@uni-jena.de - -name: release-please-action +name: Release Please on: push: branches: - main + workflow_dispatch: {} jobs: - test_and_lint: - uses: NFDI4Chem/nmrkit/.github/workflows/test.yml@main - release-please: runs-on: ubuntu-latest - needs: test_and_lint + permissions: + contents: write + pull-requests: write steps: - - uses: google-github-actions/release-please-action@v3 + - uses: googleapis/release-please-action@v4.2.0 with: release-type: python - package-name: release-please-action - token: ${{ secrets.PAT }} - prerelease: true \ No newline at end of file + target-branch: main + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/app/main.py b/app/main.py index 9160d18..b9fdb6f 100644 --- a/app/main.py +++ b/app/main.py @@ -13,9 +13,62 @@ from prometheus_fastapi_instrumentator import Instrumentator from app.schemas import HealthCheck +DESCRIPTION = """ +## NMR Kit API + +A Python-based microservice for **storing**, **parsing**, **converting**, and +**predicting** NMR (Nuclear Magnetic Resonance) spectra. + +### Modules + +| Module | Description | +|--------|-------------| +| **Chemistry** | Generate HOSE codes, label atoms via ALATIS | +| **Spectra** | Parse NMR spectra from files or URLs | +| **Converter** | Convert NMR raw data to NMRium JSON | +| **Predict** | Predict NMR spectra using nmrdb.org or nmrshift engines | +| **Registration** | Register and query molecules via lwreg | + +### Links + +* [Documentation](https://nfdi4chem.github.io/nmrkit) +* [Source Code](https://github.com/NFDI4Chem/nmrkit) +""" + +tags_metadata = [ + { + "name": "healthcheck", + "description": "Health check endpoints to verify service availability.", + }, + { + "name": "chem", + "description": "Chemistry operations including HOSE code generation and atom labeling.", + }, + { + "name": "spectra", + "description": "Parse NMR spectra from uploaded files or remote URLs.", + }, + { + "name": "converter", + "description": "Convert NMR raw data into NMRium-compatible JSON format.", + }, + { + "name": "predict", + "description": ( + "Predict NMR spectra from molecular structures using " + "**nmrdb.org** or **nmrshift** prediction engines." + ), + }, + { + "name": "registration", + "description": "Register, query, and retrieve molecules using the lwreg registration system.", + }, +] + app = FastAPI( title=config.PROJECT_NAME, - description="Python-based microservice to store and predict spectra.", + version=config.VERSION, + description=DESCRIPTION, terms_of_service="https://nfdi4chem.github.io/nmrkit", contact={ "name": "Steinbeck Lab", @@ -26,6 +79,7 @@ "name": "CC BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/", }, + openapi_tags=tags_metadata, ) app.include_router(registration.router) @@ -42,6 +96,7 @@ version_format="{major}", prefix_format="/v{major}", enable_latest=True, + description=DESCRIPTION, terms_of_service="https://nfdi4chem.github.io/nmrkit", contact={ "name": "Steinbeck Lab", @@ -52,6 +107,7 @@ "name": "CC BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/", }, + openapi_tags=tags_metadata, ) Instrumentator().instrument(app).expose(app) diff --git a/app/routers/chem.py b/app/routers/chem.py index 5a4f803..d7c8c00 100644 --- a/app/routers/chem.py +++ b/app/routers/chem.py @@ -1,11 +1,9 @@ from typing import Annotated from psycopg2.errors import UniqueViolation from app.modules.cdkmodules import getCDKHOSECodes -from fastapi import APIRouter, HTTPException, status, Query, Body +from fastapi import APIRouter, HTTPException, status, Query from app.modules.rdkitmodules import getRDKitHOSECodes from app.schemas import HealthCheck -from app.schemas.alatis import AlatisModel -import requests router = APIRouter( prefix="/chem", @@ -41,23 +39,91 @@ def get_health() -> HealthCheck: @router.get( "/hosecode", tags=["chem"], - summary="Generates HOSE codes of molecule", + summary="Generate HOSE codes for a molecule", + description=( + "Generate **Hierarchically Ordered Spherical Environment (HOSE)** codes " + "for every atom in the given molecule. HOSE codes encode the local chemical " + "environment around each atom up to a configurable number of spheres.\n\n" + "Supports two cheminformatics frameworks:\n" + "- **CDK** (Chemistry Development Kit) — default, supports stereo\n" + "- **RDKit** — alternative implementation" + ), response_model=list[str], - response_description="Returns an array of hose codes generated", + response_description="Array of HOSE code strings, one per atom in the molecule", status_code=status.HTTP_200_OK, + responses={ + 200: { + "description": "Successfully generated HOSE codes", + "content": { + "application/json": { + "example": [ + "C(CC,CC,&)", + "C(CC,C&,&)", + "C(CC,CC,&)", + "C(CCC,CC&,&)", + "C(CC,CC,CC)", + "C(CC,CC,CC)", + ] + } + }, + }, + 409: {"description": "Molecule already exists (unique constraint violation)"}, + 422: {"description": "Error parsing the molecular structure"}, + }, ) async def HOSE_Codes( - smiles: Annotated[str, Query(examples=["CCCC1CC1"])], - framework: Annotated[str, Query(enum=["cdk", "rdkit"])] = "cdk", - spheres: Annotated[int, Query()] = 3, - usestereo: Annotated[bool, Query()] = False, + smiles: Annotated[ + str, + Query( + description="SMILES string representing the molecular structure", + example="CCCC1CC1", + examples=[ + "CCCC1CC1", + "c1ccccc1", + "CC(=O)O", + "CCO", + "C1CCCCC1", + "CC(=O)Oc1ccccc1C(=O)O", + ], + ), + ], + framework: Annotated[ + str, + Query( + enum=["cdk", "rdkit"], + description="Cheminformatics framework to use for HOSE code generation", + ), + ] = "cdk", + spheres: Annotated[ + int, + Query( + description="Number of spheres (bond distance) to consider around each atom", + ge=1, + le=10, + ), + ] = 3, + usestereo: Annotated[ + bool, + Query( + description="Whether to include stereochemistry information in HOSE codes (CDK only)", + ), + ] = False, ) -> list[str]: """ - ## Generates HOSE codes for a given molecule - Endpoint to generate HOSE codes based on each atom in the given molecule. + ## Generate HOSE codes for a given molecule - Returns: - HOSE Codes: An array of hose codes generated + Generates HOSE (Hierarchically Ordered Spherical Environment) codes based on + each atom in the given molecule. These codes are widely used in NMR chemical + shift prediction. + + ### Parameters + - **smiles**: A valid SMILES string (e.g. `CCCC1CC1`) + - **framework**: Choose `cdk` (default) or `rdkit` + - **spheres**: Number of bond spheres to encode (default: 3) + - **usestereo**: Include stereochemistry in codes (CDK only, default: false) + + ### Returns + An array of HOSE code strings, one for each atom in the molecule. """ try: if framework == "cdk": @@ -78,32 +144,3 @@ async def HOSE_Codes( detail="Error parsing the structure " + e.message, headers={"X-Error": "RDKit molecule input parse error"}, ) - - -@router.post( - "/label-atoms", - tags=["chem"], - summary="Label atoms using ALATIS naming system", - response_model=AlatisModel, - response_description="", - status_code=status.HTTP_200_OK, -) -async def label_atoms(data: Annotated[str, Body(embed=False, media_type="text/plain")]): - """ - ## Generates atom labels for a given molecule - - Returns: - JSON with various representations - """ - try: - url = "http://alatis.nmrfam.wisc.edu/upload" - payload = {"input_text": data, "format": "format_", "response_type": "json"} - response = requests.request("POST", url, data=payload) - response.raise_for_status() # Raise an error for bad status codes - return response.json() - except Exception as e: - raise HTTPException( - status_code=422, - detail=f"Error parsing the structure: {str(e)}", - headers={"X-Error": "RDKit molecule input parse error"}, - ) diff --git a/app/routers/converter.py b/app/routers/converter.py index 23cd4dd..bae9e78 100644 --- a/app/routers/converter.py +++ b/app/routers/converter.py @@ -1,5 +1,5 @@ import subprocess -from fastapi import APIRouter, HTTPException, status, Response +from fastapi import APIRouter, HTTPException, status, Response, Query from app.schemas import HealthCheck router = APIRouter( @@ -36,17 +36,42 @@ def get_health() -> HealthCheck: @router.get( "/spectra", tags=["converter"], - summary="Load and convert NMR raw data", - # response_model=List[int], - response_description="Load and convert NMR raw data", + summary="Convert NMR raw data to NMRium JSON", + description=( + "Fetch NMR raw data from a remote URL and convert it into " + "[NMRium](https://www.nmrium.org/)-compatible JSON format. " + "The conversion is performed by the **nmr-cli** tool running " + "inside a Docker container.\n\n" + "Supported input formats include Bruker, JCAMP-DX, and other " + "formats recognized by nmr-cli." + ), + response_description="NMRium-compatible JSON representation of the NMR data", status_code=status.HTTP_200_OK, + responses={ + 200: { + "description": "Successfully converted NMR data to NMRium JSON", + "content": {"application/json": {}}, + }, + 500: {"description": "Conversion failed or Docker container not available"}, + }, ) -async def nmr_load_save(url: str): +async def nmr_load_save( + url: str = Query( + ..., + description="URL pointing to the NMR raw data file to convert", + examples=["https://example.com/nmr-data/sample.zip"], + ), +): """ - ## Return nmrium json + ## Convert NMR raw data to NMRium JSON - Returns: - Return nmrium json + Fetches NMR raw data from the provided URL and converts it into NMRium JSON format. + + ### Parameters + - **url**: A publicly accessible URL pointing to the NMR raw data + + ### Returns + NMRium-compatible JSON object containing the converted spectra data. """ process = subprocess.Popen( ["docker exec nmr-converter nmr-cli -u " + url], diff --git a/app/routers/predict.py b/app/routers/predict.py index a64bb3b..cf7fa56 100644 --- a/app/routers/predict.py +++ b/app/routers/predict.py @@ -1,19 +1,454 @@ -import subprocess -from fastapi import APIRouter, HTTPException, status, Response, Body +from fastapi import APIRouter, HTTPException, status, UploadFile, File, Form from app.schemas import HealthCheck -from app.schemas.respredict_response_schema import ResPredictModel -from app.schemas.error import ErrorResponse, BadRequestModel, NotFoundModel -import uuid -from typing import Annotated +from pydantic import BaseModel, Field, model_validator +from typing import Annotated, List, Literal, Optional, Union +from enum import Enum +import subprocess +import json +import tempfile import os +import uuid +import time +from pathlib import Path router = APIRouter( prefix="/predict", tags=["predict"], dependencies=[], - responses={404: {"description": "Not Found"}}, + responses={ + 404: {"description": "Not Found"}, + 408: {"description": "Prediction timed out"}, + 422: {"description": "Invalid input or NMR CLI error"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, ) +# Container name for nmr-cli (from docker-compose.yml) +NMR_CLI_CONTAINER = "nmr-converter" +SHARED_VOLUME_PATH = "/shared" + + +# ============================================================================ +# ENUMS +# ============================================================================ + + +class SpectraType(str, Enum): + PROTON = "proton" + CARBON = "carbon" + COSY = "cosy" + HSQC = "hsqc" + HMBC = "hmbc" + + +class PeakShape(str, Enum): + GAUSSIAN = "gaussian" + LORENTZIAN = "lorentzian" + + +class Solvent(str, Enum): + ANY = "Any" + CHLOROFORM = "Chloroform-D1 (CDCl3)" + DMSO = "Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)" + METHANOL = "Methanol-D4 (CD3OD)" + D2O = "Deuteriumoxide (D2O)" + ACETONE = "Acetone-D6 ((CD3)2CO)" + CCL4 = "TETRACHLORO-METHANE (CCl4)" + PYRIDINE = "Pyridin-D5 (C5D5N)" + BENZENE = "Benzene-D6 (C6D6)" + NEAT = "neat" + THF = "Tetrahydrofuran-D8 (THF-D8, C4D4O)" + + +# ============================================================================ +# ENGINE-SPECIFIC OPTIONS +# ============================================================================ + + +class FromTo(BaseModel): + """Range with from/to values in ppm""" + from_: float = Field(..., alias="from", description="From in ppm") + to: float = Field(..., description="To in ppm") + + model_config = {"populate_by_name": True} + + +class NbPoints2D(BaseModel): + x: int = Field(default=1024, description="2D spectrum X-axis points") + y: int = Field(default=1024, description="2D spectrum Y-axis points") + + +class Options1D(BaseModel): + """1D spectrum generation options for nmrdb.org""" + proton: FromTo = Field( + default=FromTo.model_validate({"from": -1, "to": 12}), + description="Proton (1H) range in ppm", + ) + carbon: FromTo = Field( + default=FromTo.model_validate({"from": -5, "to": 220}), + description="Carbon (13C) range in ppm", + ) + nbPoints: int = Field(default=2**17, description="1D number of points") + lineWidth: float = Field(default=1, description="1D line width") + + model_config = {"populate_by_name": True} + + +class Options2D(BaseModel): + """2D spectrum generation options for nmrdb.org""" + nbPoints: NbPoints2D = Field( + default_factory=NbPoints2D, + description="2D number of points", + ) + + +class NmrdbOptions(BaseModel): + """Options for the nmrdb.org prediction engine""" + name: str = Field(default="", description="Compound name") + frequency: float = Field(default=400, description="NMR frequency (MHz)") + one_d: Options1D = Field( + alias="1d", + default_factory=Options1D, + description="1D spectrum options", + ) + two_d: Options2D = Field( + alias="2d", + default_factory=Options2D, + description="2D spectrum options", + ) + autoExtendRange: bool = Field( + default=True, + description="Auto extend range if signals fall outside", + ) + + model_config = {"populate_by_name": True} + + +class NmrshiftOptions(BaseModel): + """Options for the nmrshift prediction engine""" + id: int = Field(default=1, description="Input ID") + shifts: str = Field(default="1", description="Chemical shifts") + solvent: Solvent = Field(default=Solvent.DMSO, description="NMR solvent") + from_ppm: Optional[float] = Field( + default=None, + alias="from", + description="From in (ppm) for spectrum generation", + ) + to_ppm: Optional[float] = Field( + default=None, + alias="to", + description="To in (ppm) for spectrum generation", + ) + nbPoints: int = Field(default=1024, description="Number of points") + lineWidth: float = Field(default=1, description="Line width") + frequency: float = Field(default=400, description="NMR frequency (MHz)") + tolerance: float = Field( + default=0.001, description="Tolerance to group peaks") + peakShape: PeakShape = Field( + default=PeakShape.LORENTZIAN, description="Peak shape") + + model_config = {"populate_by_name": True} + + +# ============================================================================ +# REQUEST MODELS +# ============================================================================ + + +NMRDB_SUPPORTED_SPECTRA = {"proton", "carbon", "cosy", "hsqc", "hmbc"} +NMRSHIFT_SUPPORTED_SPECTRA = {"proton", "carbon"} + + +class NmrdbPredictRequest(BaseModel): + """Prediction request using the nmrdb.org engine""" + engine: Literal["nmrdb.org"] = Field(..., description="Prediction engine") + structure: str = Field(..., description="MOL file content") + spectra: List[SpectraType] = Field(..., + description="Spectra types", min_length=1) + options: NmrdbOptions = Field(default_factory=NmrdbOptions) + + @model_validator(mode="after") + def validate_spectra(self): + unsupported = [ + s.value for s in self.spectra if s.value not in NMRDB_SUPPORTED_SPECTRA] + if unsupported: + raise ValueError( + f"nmrdb.org does not support: {unsupported}. " + f"Supported: {sorted(NMRDB_SUPPORTED_SPECTRA)}" + ) + return self + + +class NmrshiftPredictRequest(BaseModel): + """Prediction request using the nmrshift engine""" + engine: Literal["nmrshift"] = Field(..., description="Prediction engine") + structure: str = Field(..., description="MOL file content") + spectra: List[SpectraType] = Field(..., + description="Spectra types", min_length=1) + options: NmrshiftOptions = Field(default_factory=NmrshiftOptions) + + @model_validator(mode="after") + def validate_spectra(self): + unsupported = [ + s.value for s in self.spectra if s.value not in NMRSHIFT_SUPPORTED_SPECTRA] + if unsupported: + raise ValueError( + f"nmrshift does not support: {unsupported}. " + f"Supported: {sorted(NMRSHIFT_SUPPORTED_SPECTRA)}" + ) + return self + + +# File upload request models - same options as structure models +class NmrdbFileRequest(BaseModel): + """File upload prediction request using the nmrdb.org engine""" + engine: Literal["nmrdb.org"] = Field(..., description="Prediction engine") + spectra: List[SpectraType] = Field(..., + description="Spectra types", min_length=1) + options: NmrdbOptions = Field(default_factory=NmrdbOptions) + + @model_validator(mode="after") + def validate_spectra(self): + unsupported = [ + s.value for s in self.spectra if s.value not in NMRDB_SUPPORTED_SPECTRA] + if unsupported: + raise ValueError( + f"nmrdb.org does not support: {unsupported}. " + f"Supported: {sorted(NMRDB_SUPPORTED_SPECTRA)}" + ) + return self + + +class NmrshiftFileRequest(BaseModel): + """File upload prediction request using the nmrshift engine""" + engine: Literal["nmrshift"] = Field(..., description="Prediction engine") + spectra: List[SpectraType] = Field(..., + description="Spectra types", min_length=1) + options: NmrshiftOptions = Field(default_factory=NmrshiftOptions) + + @model_validator(mode="after") + def validate_spectra(self): + unsupported = [ + s.value for s in self.spectra if s.value not in NMRSHIFT_SUPPORTED_SPECTRA] + if unsupported: + raise ValueError( + f"nmrshift does not support: {unsupported}. " + f"Supported: {sorted(NMRSHIFT_SUPPORTED_SPECTRA)}" + ) + return self + + +PredictRequest = Annotated[ + Union[NmrdbPredictRequest, NmrshiftPredictRequest], + Field(discriminator="engine"), +] + +FileRequest = Annotated[ + Union[NmrdbFileRequest, NmrshiftFileRequest], + Field(discriminator="engine"), +] + + +# ============================================================================ +# CLI BUILDERS +# ============================================================================ + + +def build_nmrdb_args(options: NmrdbOptions, spectra: List[SpectraType]) -> list[str]: + """Build CLI arguments for nmrdb.org""" + one_d = options.one_d + two_d = options.two_d + + args = [ + "--engine", "nmrdb.org", + "--spectra", *[s.value for s in spectra], + "--frequency", str(options.frequency), + "--protonFrom", str(one_d.proton.from_), + "--protonTo", str(one_d.proton.to), + "--carbonFrom", str(one_d.carbon.from_), + "--carbonTo", str(one_d.carbon.to), + "--nbPoints1d", str(one_d.nbPoints), + "--lineWidth", str(one_d.lineWidth), + "--nbPoints2dX", str(two_d.nbPoints.x), + "--nbPoints2dY", str(two_d.nbPoints.y), + ] + + if options.name: + args.extend(["--name", options.name]) + if not options.autoExtendRange: + args.append("--no-autoExtendRange") + + return args + + +def build_nmrshift_args(options: NmrshiftOptions, spectra: List[SpectraType]) -> list[str]: + """Build CLI arguments for nmrshift""" + args = [ + "--engine", "nmrshift", + "--spectra", *[s.value for s in spectra], + "--id", str(options.id), + "--shifts", options.shifts, + "--solvent", options.solvent.value, + "--nbPoints", str(options.nbPoints), + "--lineWidth", str(options.lineWidth), + "--frequency", str(options.frequency), + "--tolerance", str(options.tolerance), + "--peakShape", options.peakShape.value, + ] + + if options.from_ppm is not None: + args.extend(["--from", str(options.from_ppm)]) + if options.to_ppm is not None: + args.extend(["--to", str(options.to_ppm)]) + + return args + + +def build_cli_args(request: Union[NmrdbPredictRequest, NmrshiftPredictRequest, + NmrdbFileRequest, NmrshiftFileRequest]) -> list[str]: + """Build CLI args from any request type""" + if isinstance(request, (NmrdbPredictRequest, NmrdbFileRequest)): + return build_nmrdb_args(request.options, request.spectra) + elif isinstance(request, (NmrshiftPredictRequest, NmrshiftFileRequest)): + return build_nmrshift_args(request.options, request.spectra) + else: + raise HTTPException( + status_code=400, detail=f"Unknown engine type: {type(request)}") + + +# ============================================================================ +# HELPERS +# ============================================================================ + + +def copy_file_to_container(local_path: str, container_path: str) -> None: + """Copy file to container""" + try: + subprocess.run( + ["docker", "cp", local_path, + f"{NMR_CLI_CONTAINER}:{container_path}"], + check=True, + capture_output=True, + timeout=30 + ) + except subprocess.CalledProcessError as e: + error_msg = e.stderr.decode("utf-8") if e.stderr else "Unknown error" + raise HTTPException( + status_code=500, detail=f"Failed to copy file: {error_msg}") + + +def remove_file_from_container(container_path: str) -> None: + """Remove file from container""" + try: + subprocess.run( + ["docker", "exec", NMR_CLI_CONTAINER, "rm", "-f", container_path], + capture_output=True, + timeout=10 + ) + except Exception: + pass + + +def execute_cli(cmd: list[str], engine: str) -> dict: + """Execute CLI command and return parsed JSON""" + timeout = 300 if engine == "nmrdb.org" else 120 + start_time = time.time() + + try: + result = subprocess.run( + ["docker", "exec", NMR_CLI_CONTAINER] + cmd, + capture_output=True, + text=False, + timeout=timeout + ) + except subprocess.TimeoutExpired: + raise HTTPException( + status_code=408, + detail={ + "message": f"Prediction timed out after {timeout}s", + "engine": engine, + "hint": "nmrdb.org predictions can take 30-60s, try again or use nmrshift for faster results", + } + ) + except FileNotFoundError: + raise HTTPException( + status_code=500, + detail={ + "message": "Docker not found or nmr-converter container is not running", + "hint": "Run: docker compose up -d", + } + ) + + elapsed = round(time.time() - start_time, 2) + stdout = result.stdout.decode("utf-8", errors="replace").strip() + stderr = result.stderr.decode("utf-8", errors="replace").strip() + + if result.returncode != 0: + raise HTTPException( + status_code=422, + detail={ + "message": "NMR CLI command failed", + "engine": engine, + "exit_code": result.returncode, + "error": stderr or "No error output from CLI", + "elapsed_seconds": elapsed, + } + ) + + if not stdout: + raise HTTPException( + status_code=500, + detail={ + "message": "NMR CLI returned empty output", + "engine": engine, + "exit_code": result.returncode, + "stderr": stderr or "No error output from CLI", + "elapsed_seconds": elapsed, + "hint": "Check that all required CLI arguments are valid", + } + ) + + # Strip any warning/info messages printed before the JSON output + json_start = stdout.find('{') + if json_start > 0: + warnings = stdout[:json_start].strip() + print(f"[WARN] CLI warnings before JSON: {warnings}") + stdout = stdout[json_start:] + + try: + return json.loads(stdout) + except json.JSONDecodeError as e: + raise HTTPException( + status_code=500, + detail={ + "message": "NMR CLI returned invalid JSON", + "engine": engine, + "parse_error": str(e), + "stdout_preview": stdout[:500], + "stderr": stderr or "No error output from CLI", + "elapsed_seconds": elapsed, + } + ) + + +def run_predict_command(structure: str, cli_args: list[str], engine: str) -> dict: + """Execute nmr-cli predict with structure string""" + # CRITICAL: Escape newlines for CLI + structure_escaped = structure.replace('\n', '\\n') + cmd = ["nmr-cli", "predict", "-s", structure_escaped] + cli_args + return execute_cli(cmd, engine) + + +def run_predict_command_with_file(file_path: str, cli_args: list[str], engine: str) -> dict: + """Execute nmr-cli predict with file""" + cmd = ["nmr-cli", "predict", "--file", file_path] + cli_args + return execute_cli(cmd, engine) + + +# ============================================================================ +# HEALTH CHECK +# ============================================================================ + @router.get("/", include_in_schema=False) @router.get( @@ -26,120 +461,234 @@ response_model=HealthCheck, ) def get_health() -> HealthCheck: - """ - ## Perform a Health Check - Endpoint to perform a healthcheck on. This endpoint can primarily be used by Docker - to ensure a robust container orchestration and management are in place. Other - services that rely on the proper functioning of the API service will not deploy if this - endpoint returns any other HTTP status code except 200 (OK). - Returns: - HealthCheck: Returns a JSON response with the health status - """ + """Health check endpoint""" return HealthCheck(status="OK") +# ============================================================================ +# ENDPOINTS +# ============================================================================ + + @router.post( - "/respredict", - summary="", + "/", + tags=["predict"], + summary="Predict NMR spectra from a MOL string", + description=( + "Submit a molecular structure as a MOL block string and predict NMR spectra " + "using one of the supported prediction engines.\n\n" + "### Supported Engines\n\n" + "| Engine | Spectra Types | Typical Time |\n" + "|--------|--------------|-------------|\n" + "| **nmrshift** | proton, carbon | ~5-10s |\n" + "| **nmrdb.org** | proton, carbon, cosy, hsqc, hmbc | ~30-60s |\n\n" + "> **Note:** nmrdb.org predictions can take 30-60 seconds. " + "Consider using curl or Postman instead of the Swagger UI for long-running requests." + ), + response_description="Predicted spectra in NMRium-compatible JSON format", + status_code=status.HTTP_200_OK, responses={ - 200: { - "description": "Successful response", - "model": ResPredictModel, - }, - 400: {"description": "Bad Request", "model": BadRequestModel}, - 404: {"description": "Not Found", "model": NotFoundModel}, - 422: {"description": "Unprocessable Entity", "model": ErrorResponse}, + 200: {"description": "Successfully predicted NMR spectra"}, + 400: {"description": "Unknown engine type"}, + 408: {"description": "Prediction timed out (nmrdb.org: 300s, nmrshift: 120s)"}, + 422: {"description": "Invalid structure or NMR CLI error"}, + 500: {"description": "Docker or nmr-converter container not available"}, }, ) -async def predict_mol( - data: Annotated[ - str, - Body( - embed=False, - media_type="text/plain", - openapi_examples={ - "example1": { - "summary": "Example: C", - "value": """ - CDK 09012310592D - - 1 0 0 0 0 0 0 0 0 0999 V2000 - 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -M END""", - }, +async def predict_from_structure(request: PredictRequest): + """ + ## Predict NMR spectra from a MOL string + + **Note:** nmrdb.org predictions take 30-60s. Use curl/Postman, not Swagger. + + ### Engines + + - **nmrshift** — Supports: `proton`, `carbon` + - **nmrdb.org** — Supports: `proton`, `carbon`, `cosy`, `hsqc`, `hmbc` + + ### Example (nmrshift) + ```json + { + "engine": "nmrshift", + "structure": "\\n Mrv2311...\\nM END", + "spectra": ["proton"], + "options": { + "solvent": "Chloroform-D1 (CDCl3)", + "frequency": 400, + "nbPoints": 1024, + "lineWidth": 1, + "peakShape": "lorentzian" + } + } + ``` + + ### Example (nmrdb.org) + ```json + { + "engine": "nmrdb.org", + "structure": "\\n Mrv2311...\\nM END", + "spectra": ["proton", "carbon"], + "options": { + "name": "Benzene", + "frequency": 400, + "1d": { + "proton": {"from": -1, "to": 12}, + "carbon": {"from": -5, "to": 220}, + "nbPoints": 131072, + "lineWidth": 1 }, + "autoExtendRange": true + } + } + ``` + """ + try: + cli_args = build_cli_args(request) + return run_predict_command(request.structure, cli_args, request.engine) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=422, detail=f"Error: {e}") + + +@router.post( + "/file", + tags=["predict"], + summary="Predict NMR spectra from an uploaded MOL file", + description=( + "Upload a MOL file and predict NMR spectra using one of the supported engines. " + "Engine configuration is passed as a JSON string in the `request` form field.\n\n" + "### Supported Engines\n\n" + "| Engine | Spectra Types | Typical Time |\n" + "|--------|--------------|-------------|\n" + "| **nmrshift** | proton, carbon | ~5-10s |\n" + "| **nmrdb.org** | proton, carbon, cosy, hsqc, hmbc | ~30-60s |\n\n" + "> **Note:** nmrdb.org predictions can take 30-60 seconds. " + "Consider using curl or Postman instead of the Swagger UI for long-running requests." + ), + response_description="Predicted spectra in NMRium-compatible JSON format", + status_code=status.HTTP_200_OK, + responses={ + 200: {"description": "Successfully predicted NMR spectra from file"}, + 400: {"description": "Unknown engine type"}, + 408: {"description": "Prediction timed out (nmrdb.org: 300s, nmrshift: 120s)"}, + 422: {"description": "Invalid JSON in request field or NMR CLI error"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, +) +async def predict_from_file( + file: UploadFile = File(..., description="MOL file containing the molecular structure"), + request: str = Form( + ..., + description=( + 'JSON string with engine, spectra, and options. ' + 'Example (nmrshift): {"engine": "nmrshift", "spectra": ["proton"], ' + '"options": {"solvent": "Chloroform-D1 (CDCl3)", "frequency": 400, ' + '"nbPoints": 1024, "lineWidth": 1, "peakShape": "lorentzian"}} — ' + 'Example (nmrdb.org): {"engine": "nmrdb.org", "spectra": ["proton", "carbon"], ' + '"options": {"name": "Benzene", "frequency": 400, ' + '"1d": {"proton": {"from": -1, "to": 12}, "carbon": {"from": -5, "to": 220}, ' + '"nbPoints": 131072, "lineWidth": 1}, "autoExtendRange": true}}' ), - ] + ), ): """ - Standardize molblock using the ChEMBL curation pipeline - and return the standardized molecule, SMILES, InChI, and InCHI-Key. + ## Predict NMR spectra from an uploaded MOL file - Parameters: - - **molblock**: The request body containing the "molblock" string representing the molecule to be standardized. + Upload a MOL file and pass engine options as a JSON string in the `request` field. - Returns: - - dict: A dictionary containing the following keys: - - "standardized_mol" (str): The standardized molblock of the molecule. - - "canonical_smiles" (str): The canonical SMILES representation of the molecule. - - "inchi" (str): The InChI representation of the molecule. - - "inchikey" (str): The InChI-Key of the molecule. + > **Note:** nmrdb.org predictions take 30-60s. Use curl/Postman, not Swagger. - Raises: - - ValueError: If the SMILES string is not provided or is invalid. + ### nmrshift example `request` field + ```json + { + "engine": "nmrshift", + "spectra": ["proton"], + "options": { + "solvent": "Chloroform-D1 (CDCl3)", + "frequency": 400, + "nbPoints": 1024, + "lineWidth": 1, + "peakShape": "lorentzian" + } + } + ``` + ### nmrdb.org example `request` field + ```json + { + "engine": "nmrdb.org", + "spectra": ["proton", "carbon"], + "options": { + "name": "Benzene", + "frequency": 400, + "1d": { + "proton": {"from": -1, "to": 12}, + "carbon": {"from": -5, "to": 220}, + "nbPoints": 131072, + "lineWidth": 1 + }, + "autoExtendRange": true + } + } + ``` """ + local_file_path = None + container_file_path = None + use_shared_volume = os.path.exists( + SHARED_VOLUME_PATH) and os.access(SHARED_VOLUME_PATH, os.W_OK) + try: - if data: - file_name = "/shared/" + str(uuid.uuid4()) + ".mol" - f = open(file_name, "a") - f.write(data) - f.close() - process = subprocess.Popen( - [ - "docker exec nmr-respredict python3 predict_standalone.py --filename " - + file_name - ], - stdout=subprocess.PIPE, - shell=True, - ) - (output, err) = process.communicate() - process.wait() - if err: - raise HTTPException(status_code=500, detail=err) - else: - if os.path.exists(file_name): - os.remove(file_name) - return Response(content=output, media_type="application/json") + # Parse the JSON request field + try: + request_data = json.loads(request) + except json.JSONDecodeError as e: + raise HTTPException( + status_code=422, detail=f"Invalid JSON in request field: {e}") + + # Validate against the correct model based on engine + engine = request_data.get("engine") + if engine == "nmrdb.org": + parsed_request = NmrdbFileRequest(**request_data) + elif engine == "nmrshift": + parsed_request = NmrshiftFileRequest(**request_data) + else: + raise HTTPException( + status_code=400, detail=f"Unknown engine: {engine}. Use 'nmrdb.org' or 'nmrshift'") + + # Build CLI args using same builders as structure endpoint + cli_args = build_cli_args(parsed_request) + + # Read and save uploaded file + contents = await file.read() + + if use_shared_volume: + # FAST: Write directly to shared volume + filename = f"predict_{uuid.uuid4().hex[:8]}.mol" + local_file_path = os.path.join(SHARED_VOLUME_PATH, filename) + container_file_path = f"/shared/{filename}" + with open(local_file_path, 'wb') as f: + f.write(contents) + else: + # Fallback: Use temp file + docker cp + with tempfile.NamedTemporaryFile(delete=False, suffix=".mol") as tmp_file: + tmp_file.write(contents) + local_file_path = tmp_file.name + container_file_path = f"/tmp/{Path(local_file_path).name}" + copy_file_to_container(local_file_path, container_file_path) + + return run_predict_command_with_file(container_file_path, cli_args, parsed_request.engine) + + except HTTPException: + raise except Exception as e: - raise HTTPException(status_code=422, detail=str(e)) - - -# @router.get( -# "/predict", -# tags=["predict"], -# summary="Load and convert NMR raw data", -# #response_model=List[int], -# response_description="Load and convert NMR raw data", -# status_code=status.HTTP_200_OK, -# ) -# async def nmr_respredict(url: str): -# """ -# ## Return nmrium json - -# Returns: -# Return nmrium json -# """ -# file = "/shared" + str(uuid.uuid4()) + ".mol" - -# process = subprocess.Popen( -# ["docker exec nmr-respredict python3 ./predict_standalone.py --filename " + file], -# stdout=subprocess.PIPE, -# shell=True, -# ) -# (output, err) = process.communicate() -# process.wait() -# if err: -# raise HTTPException(status_code=500, detail=err) -# else: -# return Response(content=output, media_type="application/json") + raise HTTPException(status_code=422, detail=f"Error: {e}") + finally: + if local_file_path and os.path.exists(local_file_path): + try: + os.unlink(local_file_path) + except Exception: + pass + if not use_shared_volume and container_file_path: + remove_file_from_container(container_file_path) + await file.close() diff --git a/app/routers/registration.py b/app/routers/registration.py index cdf27ab..1f92b28 100644 --- a/app/routers/registration.py +++ b/app/routers/registration.py @@ -1,6 +1,6 @@ from typing import List, Annotated, Union from app.core.config import LWREG_CONFIG -from fastapi import APIRouter, HTTPException, Body, status +from fastapi import APIRouter, HTTPException, Body, Query, status from lwreg.utils import ( initdb, bulk_register, @@ -16,7 +16,10 @@ prefix="/registration", tags=["registration"], dependencies=[], - responses={404: {"description": "Not Found"}}, + responses={ + 404: {"description": "Not Found"}, + 500: {"description": "Internal server error"}, + }, ) @@ -34,7 +37,7 @@ def get_health() -> HealthCheck: """ ## Perform a Health Check Endpoint to perform a healthcheck on. This endpoint can primarily be used by Docker - to ensure a robust container orchestration and management is in place. Other + to ensure a robust container orchestration and management are in place. Other services which rely on the proper functioning of the API service will not deploy if this endpoint returns any other HTTP status code except 200 (OK). Returns: @@ -46,20 +49,42 @@ def get_health() -> HealthCheck: @router.post( "/init", tags=["registration"], - summary="Initializes the registration database", - response_description="Returns boolean indicating the success of the initialisation", + summary="Initialize the registration database", + description=( + "Initialize (or re-initialize) the molecule registration database. " + "**Warning:** This operation destroys all existing data in the registration " + "database. Set `confirm` to `true` to proceed." + ), + response_description="Boolean indicating whether initialization was successful", status_code=status.HTTP_200_OK, response_model=Union[bool, None], + responses={ + 200: { + "description": "Database initialized successfully", + "content": {"application/json": {"example": True}}, + }, + }, ) -async def initialise_database(confirm: Annotated[bool, Body(embed=True)] = False): +async def initialise_database( + confirm: Annotated[ + bool, + Body( + embed=True, + description="Set to true to confirm database initialization. False returns immediately.", + ), + ] = False, +): """ - ## Initializes the registration database + ## Initialize the registration database - NOTE: This call destroys any existing information in the registration database + > **WARNING:** This call destroys any existing information in the registration database. - Arguments: + ### Parameters + - **confirm**: Must be set to `true` to actually perform the initialization. + If `false` (default), the call returns immediately without changes. - confirm -- if set to False we immediately return + ### Returns + `true` if initialization was successful, `null` if confirm was `false`. """ return initdb(config=LWREG_CONFIG, confirm=confirm) @@ -67,23 +92,78 @@ async def initialise_database(confirm: Annotated[bool, Body(embed=True)] = False @router.post( "/register", tags=["registration"], - summary="Registers new molecules", - response_description="Returns the new registry number(s) (molregno). If all entries are duplicates exception is raised", + summary="Register new molecules", + description=( + "Register one or more molecules in the database. Accepts SMILES strings " + "(one per line) or an SDF block as plain text. Returns the new registry " + "numbers (molregnos) for successfully registered molecules.\n\n" + "Duplicate molecules are flagged as `DUPLICATE` and parse failures as " + "`PARSE_FAILURE` in the response array." + ), + response_description="Array of registry numbers (integers) or status strings (DUPLICATE, PARSE_FAILURE)", status_code=status.HTTP_200_OK, response_model=List[Union[str, int]], + responses={ + 200: { + "description": "Molecules registered successfully", + "content": { + "application/json": { + "example": [1, "DUPLICATE", 3], + } + }, + }, + 422: {"description": "Registration failed — all entries are duplicates or unparseable"}, + }, ) async def register_compounds( - data: Annotated[str, Body(embed=False, media_type="text/plain")] = "CCCC" + data: Annotated[ + str, + Body( + embed=False, + media_type="text/plain", + description=( + "Molecular data as plain text. Provide either SMILES strings " + "(one per line) or an SDF block (containing $$$$ delimiters)." + ), + openapi_examples={ + "smiles": { + "summary": "SMILES input", + "description": "One or more SMILES strings, one per line", + "value": "CCCC\nCCCCO\nc1ccccc1", + }, + "sdf": { + "summary": "SDF block", + "description": "An SDF block with $$$$ delimiters", + "value": ( + "\n Mrv2311 08092305412D\n\n" + " 3 2 0 0 0 0 999 V2000\n" + " -0.4018 0.6926 0.0000 C 0 0 0 0 0 0\n" + " 0.3127 1.1051 0.0000 C 0 0 0 0 0 0\n" + " 1.0272 0.6926 0.0000 O 0 0 0 0 0 0\n" + " 1 2 1 0 0 0 0\n" + " 2 3 1 0 0 0 0\n" + "M END\n$$$$" + ), + }, + }, + ), + ] = "CCCC", ): """ - ## Registers new molecules, assuming it doesn't already exist, - and returns the new registry number(s) (molregno). If all entries - are duplicates exception is raised + ## Register new molecules + + Registers one or more molecules (assuming they don't already exist) and returns + the new registry number(s) (molregno). - #### Only one of the molecule format objects should be provided + ### Input Formats + - **SMILES** — one SMILES string per line + - **SDF block** — MOL/SDF format with `$$$$` delimiters - molblock -- MOL or SDF block - smiles -- smiles + ### Response + An array where each element is either: + - An **integer** — the new molregno for a successfully registered molecule + - `"DUPLICATE"` — the molecule already exists in the database + - `"PARSE_FAILURE"` — the molecule could not be parsed """ try: if "$$$$" in data: @@ -120,17 +200,45 @@ async def register_compounds( @router.get( "/query", tags=["registration"], - summary="Queries to see if a molecule has already been registered", + summary="Query if a molecule is already registered", + description=( + "Check whether a molecule (given as a SMILES string) has already been " + "registered in the database. Returns the corresponding registry numbers " + "(molregnos) if found." + ), response_model=List[int], - response_description="Returns the corresponding registry numbers (molregnos)", + response_description="Array of matching registry numbers (molregnos)", status_code=status.HTTP_200_OK, + responses={ + 200: { + "description": "Query completed successfully", + "content": { + "application/json": { + "example": [42, 108], + } + }, + }, + 500: {"description": "Internal server error during query"}, + }, ) -async def query_compounds(smi: str): +async def query_compounds( + smi: str = Query( + ..., + description="SMILES string of the molecule to query", + examples=["CCCC", "c1ccccc1", "CCO"], + ), +): """ - ## Queries to see if a molecule has already been registered + ## Query if a molecule is already registered - Returns: - Corresponding registry numbers (molregnos) + Checks the registration database for the given molecule. + + ### Parameters + - **smi**: A valid SMILES string + + ### Returns + An array of integer registry numbers (molregnos) matching the query. + Returns an empty array if the molecule is not registered. """ try: res = query(smiles=smi, config=LWREG_CONFIG) @@ -142,18 +250,49 @@ async def query_compounds(smi: str): @router.post( "/retrieve", tags=["registration"], - summary="Retrieves entries based on the list of IDs provided", + summary="Retrieve registered molecules by ID", + description=( + "Retrieve one or more registered molecules by their registry IDs (molregnos). " + "Returns the molecular data and format for each requested ID." + ), response_model=tuple(), - response_description="Returns HTTP Status Code 200 (OK)", + response_description="Array of (molregno, data, format) tuples for each requested ID", status_code=status.HTTP_200_OK, + responses={ + 200: { + "description": "Successfully retrieved molecule data", + "content": { + "application/json": { + "example": [ + [1, "CCCC", "smiles"], + [2, "CCO", "smiles"], + ], + } + }, + }, + 500: {"description": "Internal server error during retrieval"}, + }, ) -async def retrieve_compounds(ids: List[int]): +async def retrieve_compounds( + ids: List[int] = Body( + ..., + description="List of registry IDs (molregnos) to retrieve", + examples=[[1, 2, 3]], + ), +): """ - ## Retrieves entries based on the ids provided + ## Retrieve registered molecules by ID - Returns: - Molecule data for one or more registry ids (molregnos). - The return value is a tuple of (molregno, data, format) 3-tuples + Fetches molecule data for one or more registry IDs (molregnos). + + ### Request Body + A JSON array of integer registry IDs. + + ### Returns + An array of `[molregno, data, format]` tuples containing: + - **molregno** — the registry number + - **data** — the molecular data (SMILES, MOL block, etc.) + - **format** — the format of the data """ try: res = retrieve(ids=ids, config=LWREG_CONFIG) diff --git a/app/routers/spectra.py b/app/routers/spectra.py index ea2c05d..501f464 100644 --- a/app/routers/spectra.py +++ b/app/routers/spectra.py @@ -1,6 +1,9 @@ -from fastapi import APIRouter, HTTPException, status, UploadFile, File, Form +from fastapi import APIRouter, Body, HTTPException, status, UploadFile, File, Form +from fastapi.responses import StreamingResponse +import io from app.schemas import HealthCheck from pydantic import BaseModel, HttpUrl, Field +from typing import Optional import subprocess import tempfile import os @@ -11,7 +14,12 @@ prefix="/spectra", tags=["spectra"], dependencies=[], - responses={404: {"description": "Not Found"}}, + responses={ + 404: {"description": "Not Found"}, + 408: {"description": "Processing timeout exceeded"}, + 422: {"description": "Error parsing the spectra input"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, ) # Container name for nmr-cli (from docker-compose.yml) @@ -19,20 +27,42 @@ class UrlParseRequest(BaseModel): - """Request model for parsing spectra from URL""" - url: HttpUrl = Field(..., description="URL of the spectra file") + """Request model for parsing spectra from a remote URL.""" + + url: HttpUrl = Field( + ..., + description="URL pointing to the NMR spectra file to parse", + json_schema_extra={ + "examples": ["https://example.com/spectra/sample.jdx"], + }, + ) capture_snapshot: bool = Field( False, - description="Generate an image snapshot of the spectra" + description="Generate an image snapshot of the spectra", ) auto_processing: bool = Field( False, - description="Enable automatic processing of spectrum (FID → FT spectra)" + description="Enable automatic processing of spectrum (FID → FT spectra)", ) auto_detection: bool = Field( False, - description="Enable ranges and zones automatic detection" + description="Enable ranges and zones automatic detection", ) + raw_data: bool = Field( + False, description="Include raw data in the output (default: data source)") + + model_config = { + "json_schema_extra": { + "examples": [ + { + "url": "https://example.com/spectra/sample.jdx", + "capture_snapshot": False, + "auto_processing": False, + "auto_detection": False, + } + ] + } + } @router.get("/", include_in_schema=False) @@ -64,8 +94,9 @@ def run_command( capture_snapshot: bool = False, auto_processing: bool = False, auto_detection: bool = False, -) -> dict: - """Execute nmr-cli command in Docker container""" + raw_data: bool = False, +) -> StreamingResponse: + """Execute nmr-cli parse-spectra command in Docker container.""" cmd = ["nmr-cli", "parse-spectra"] @@ -80,6 +111,39 @@ def run_command( cmd.append("-p") if auto_detection: cmd.append("-d") + if raw_data: + cmd.append("-r") + + try: + result = subprocess.run( + ["docker", "exec", NMR_CLI_CONTAINER] + cmd, + capture_output=True, + timeout=120, + ) + except subprocess.TimeoutExpired: + raise HTTPException( + status_code=408, detail="Processing timeout exceeded") + except FileNotFoundError: + raise HTTPException( + status_code=500, detail="Docker not found or nmr-converter container not running.") + + if result.returncode != 0: + raise HTTPException( + status_code=422, + detail=f"NMR CLI error: {result.stderr.decode('utf-8') or 'Unknown error'}", + ) + + return StreamingResponse( + io.BytesIO(result.stdout), + media_type="application/json", + headers={"Content-Disposition": "attachment; filename=parse-output.json"}, + ) + + +def run_publication_string_command(publication_string: str) -> dict: + """Execute nmr-cli parse-publication-string command in Docker container.""" + + cmd = ["nmr-cli", "parse-publication-string", publication_string] try: result = subprocess.run( @@ -107,15 +171,25 @@ def run_command( detail=f"NMR CLI error: {error_msg}" ) - # Parse output + stdout = result.stdout.decode("utf-8").strip() + + if not stdout: + raise HTTPException( + status_code=422, + detail="NMR CLI returned empty output. The publication string may be invalid or unrecognized." + ) + + # Validate that stdout is valid JSON without fully deserializing try: - return json.loads(result.stdout.decode("utf-8")) + json.loads(stdout) except json.JSONDecodeError as e: raise HTTPException( status_code=500, detail=f"Invalid JSON from NMR CLI: {e}" ) + return stdout + def copy_file_to_container(local_path: str, container_path: str) -> None: """Copy a file to the nmr-converter container.""" @@ -147,40 +221,161 @@ def remove_file_from_container(container_path: str) -> None: pass +class PeakItem(BaseModel): + """A single NMR peak.""" + x: float = Field(..., description="Chemical shift in ppm") + y: Optional[float] = Field( + 1.0, description="Peak intensity (default: 1.0)") + width: Optional[float] = Field( + 1.0, description="Peak width in Hz (default: 1.0)") + + +class PeaksToNMRiumOptions(BaseModel): + """Options for peaks-to-NMRium conversion.""" + nucleus: Optional[str] = Field( + "1H", description="Nucleus type (e.g. '1H', '13C')") + solvent: Optional[str] = Field("", description="NMR solvent") + frequency: Optional[float] = Field(400, description="NMR frequency in MHz") + nbPoints: Optional[int] = Field( + 131072, description="Number of points for spectrum generation", alias="nb_points") + + model_config = {"populate_by_name": True} + + +class PeaksToNMRiumRequest(BaseModel): + """Request model for converting peaks to NMRium object.""" + peaks: list[PeakItem] = Field( + ..., + min_length=1, + description="List of NMR peaks with chemical shift (x), intensity (y), and width", + ) + options: Optional[PeaksToNMRiumOptions] = Field( + None, + description="Spectrum generation options", + ) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "peaks": [ + {"x": 7.26, "y": 1, "width": 1}, + {"x": 2.10, "y": 1, "width": 1}, + ], + "options": { + "nucleus": "1H", + "frequency": 400, + }, + } + ] + } + } + + +def run_peaks_to_nmrium_command(payload: dict) -> str: + """Execute nmr-cli peaks-to-nmrium command in Docker container via stdin.""" + + cmd = ["docker", "exec", "-i", NMR_CLI_CONTAINER, + "nmr-cli", "peaks-to-nmrium"] + stdin_data = json.dumps(payload) + + try: + result = subprocess.run( + cmd, + input=stdin_data.encode("utf-8"), + capture_output=True, + timeout=120, + ) + except subprocess.TimeoutExpired: + raise HTTPException( + status_code=408, + detail="Processing timeout exceeded", + ) + except FileNotFoundError: + raise HTTPException( + status_code=500, + detail="Docker not found or nmr-converter container not running.", + ) + + if result.returncode != 0: + error_msg = result.stderr.decode( + "utf-8") if result.stderr else "Unknown error" + raise HTTPException( + status_code=422, + detail=f"NMR CLI error: {error_msg}", + ) + + stdout = result.stdout.decode("utf-8").strip() + + if not stdout: + raise HTTPException( + status_code=422, + detail="NMR CLI returned empty output. The peak list may be invalid.", + ) + + try: + json.loads(stdout) + except json.JSONDecodeError as e: + raise HTTPException( + status_code=500, + detail=f"Invalid JSON from NMR CLI: {e}", + ) + + return stdout + + @router.post( "/parse/file", tags=["spectra"], - summary="Parse spectra from uploaded file", - response_description="Spectra data in JSON format", + summary="Parse spectra from an uploaded file", + description=( + "Upload an NMR spectra file (JCAMP-DX, Bruker, etc.) and parse it into " + "structured JSON. The file is processed by the **nmr-cli** tool running " + "inside a Docker container.\n\n" + "Supported file formats include JCAMP-DX (`.jdx`, `.dx`), Bruker directories " + "(zipped), and other formats supported by nmr-cli." + ), + response_description="Parsed spectra data in NMRium-compatible JSON format", status_code=status.HTTP_200_OK, + responses={ + 200: {"description": "Successfully parsed the spectra file"}, + 408: {"description": "Processing timeout exceeded (120s limit)"}, + 422: {"description": "Error parsing the spectra file"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, ) async def parse_spectra_from_file( - file: UploadFile = File(..., description="Upload a spectra file"), + file: UploadFile = File( + ..., description="NMR spectra file to parse (JCAMP-DX, Bruker zip, etc.)"), capture_snapshot: bool = Form( False, - description="Generate an image snapshot of the spectra" + description="Generate an image snapshot of the spectra", ), auto_processing: bool = Form( False, - description="Enable automatic processing of spectrum (FID → FT spectra)" + description="Enable automatic processing of spectrum (FID → FT spectra)", ), auto_detection: bool = Form( False, - description="Enable ranges and zones automatic detection" + description="Enable ranges and zones automatic detection", ), + raw_data: bool = Form( + False, description="Include raw data in the output (default: data source references)") ): """ - ## Parse spectra from uploaded file - - Upload a spectra file along with processing options using multipart/form-data. - - Processing Options: - - `capture_snapshot (s)` : Capture snapshot of the spectra - - `auto_processing (p)` : Enable automatic processing of spectrum (FID → FT spectra) - - `auto_detection (d)` : Enable ranges and zones automatic detection - - Returns: - Spectra data in JSON format + ## Parse spectra from an uploaded file + + Upload an NMR spectra file along with processing options using `multipart/form-data`. + + ### Processing Options + | Option | Description | + |--------|-------------| + | `capture_snapshot` | Capture an image snapshot of the spectra | + | `auto_processing` | Automatically process FID → FT spectra | + | `auto_detection` | Automatically detect ranges and zones | + | `raw_data` | Include raw data in the output (default: data source) | + ### Returns + Parsed spectra data in NMRium-compatible JSON format. """ local_tmp_path = None @@ -207,6 +402,7 @@ async def parse_spectra_from_file( capture_snapshot=capture_snapshot, auto_processing=auto_processing, auto_detection=auto_detection, + raw_data=raw_data, ) except HTTPException: @@ -227,23 +423,37 @@ async def parse_spectra_from_file( @router.post( "/parse/url", tags=["spectra"], - summary="Parse spectra from URL", - response_description="Spectra data in JSON format", + summary="Parse spectra from a remote URL", + description=( + "Provide a URL pointing to an NMR spectra file and parse it into structured " + "JSON. The file is fetched and processed by the **nmr-cli** tool running " + "inside a Docker container." + ), + response_description="Parsed spectra data in NMRium-compatible JSON format", status_code=status.HTTP_200_OK, + responses={ + 200: {"description": "Successfully parsed the spectra from URL"}, + 408: {"description": "Processing timeout exceeded (120s limit)"}, + 422: {"description": "Error parsing spectra from the provided URL"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, ) async def parse_spectra_from_url(request: UrlParseRequest): """ - Parse spectra from URL + ## Parse spectra from a remote URL - Provide a URL to a spectra file along with processing options using JSON body. + Provide a URL to an NMR spectra file along with processing options in the JSON body. - Processing Options: - - `capture_snapshot (s)` : Capture snapshot of the spectra - - `auto_processing (p)` : Enable automatic processing of spectrum (FID → FT spectra) - - `auto_detection (d)` : Enable ranges and zones automatic detection + ### Processing Options + | Option | Description | + |--------|-------------| + | `capture_snapshot` | Capture an image snapshot of the spectra | + | `auto_processing` | Automatically process FID → FT spectra | + | `auto_detection` | Automatically detect ranges and zones | + | `raw_data` | Include raw data in the output (default: data source) | - Returns: - Spectra data in JSON format + ### Returns + Parsed spectra data in NMRium-compatible JSON format. """ try: return run_command( @@ -251,6 +461,7 @@ async def parse_spectra_from_url(request: UrlParseRequest): capture_snapshot=request.capture_snapshot, auto_processing=request.auto_processing, auto_detection=request.auto_detection, + raw_data=request.raw_data, ) except HTTPException: @@ -260,3 +471,178 @@ async def parse_spectra_from_url(request: UrlParseRequest): status_code=422, detail=f"Error parsing spectra from URL: {e}" ) + + +@router.post( + "/parse/publication-string", + tags=["spectra"], + summary="Resurrect NMR spectrum from an ACS publication string", + description=( + "Parse an ACS-style NMR publication string and resurrect it into a full " + "NMRium-compatible spectrum. The publication string is processed by the " + "**nmr-cli** `parse-publication-string` command running inside a Docker " + "container.\n\n" + "The string is parsed to extract nucleus, solvent, and chemical shift ranges, " + "which are then used to reconstruct the spectrum data (x/y arrays) at 400 MHz " + "with 131072 points.\n\n" + "### Example publication strings\n" + "- `1H NMR (400 MHz, CDCl3) δ 7.26 (s, 1H), 2.10 (s, 3H)`\n" + "- `13C NMR (101 MHz, DMSO-d6) δ 170.1, 136.5, 128.7`" + ), + response_description="Resurrected spectrum in NMRium-compatible JSON format", + status_code=status.HTTP_200_OK, + responses={ + 200: {"description": "Successfully resurrected spectrum from publication string"}, + 408: {"description": "Processing timeout exceeded (120s limit)"}, + 422: {"description": "Invalid publication string or NMR CLI error"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, +) +async def parse_publication_string( + publication_string: str = Body( + ..., + media_type="text/plain", + openapi_examples={ + "1H proton": { + "summary": "1H NMR example", + "value": "1H NMR (400 MHz, CDCl3) δ 7.26 (s, 1H), 2.10 (s, 3H)", + }, + "13C carbon": { + "summary": "13C NMR example", + "value": "13C NMR (101 MHz, DMSO-d6) δ 170.1, 136.5, 128.7", + }, + }, + ), +): + """ + ## Resurrect NMR spectrum from a publication string + + Send the ACS-style NMR publication string directly as the request body + (plain text, no JSON wrapping). + + ### Example request body + ``` + 1H NMR (400 MHz, CDCl3) δ 7.26 (s, 1H), 2.10 (s, 3H) + ``` + + ### Returns + NMRium-compatible JSON with spectrum data, ranges, and metadata. + """ + if not publication_string or not publication_string.strip(): + raise HTTPException( + status_code=422, + detail="Publication string cannot be empty." + ) + + try: + raw_json = run_publication_string_command(publication_string.strip()) + return StreamingResponse( + io.BytesIO(raw_json.encode("utf-8")), + media_type="application/json", + headers={ + "Content-Disposition": "attachment; filename=nmrium-spectrum.json", + }, + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=422, + detail=f"Error parsing publication string: {e}" + ) + + +@router.post( + "/parse/peaks", + tags=["spectra"], + summary="Convert a peak list to an NMRium-compatible spectrum", + description=( + "Convert a list of NMR peaks (chemical shifts with optional intensity and " + "width) into a full NMRium-compatible spectrum object. Each peak is defined " + "by its chemical shift position (`x` in ppm), intensity (`y`), and width " + "(in Hz).\n\n" + "The peaks are used to generate a simulated 1D spectrum using the " + "**nmr-cli** `peaks-to-nmrium` command running inside a Docker container.\n\n" + "### Example input\n" + "```json\n" + "{\n" + ' "peaks": [\n' + ' {"x": 7.26, "y": 1, "width": 1},\n' + ' {"x": 2.10, "y": 1, "width": 1}\n' + " ],\n" + ' "options": {\n' + ' "nucleus": "1H",\n' + ' "frequency": 400\n' + " }\n" + "}\n" + "```" + ), + response_description="Generated spectrum in NMRium-compatible JSON format", + status_code=status.HTTP_200_OK, + responses={ + 200: {"description": "Successfully generated spectrum from peak list"}, + 408: {"description": "Processing timeout exceeded (120s limit)"}, + 422: {"description": "Invalid peak list or NMR CLI error"}, + 500: {"description": "Docker or nmr-converter container not available"}, + }, +) +async def parse_peaks(request: PeaksToNMRiumRequest): + """ + ## Convert a peak list to NMRium spectrum + + Provide a list of NMR peaks and optional generation parameters to produce + an NMRium-compatible spectrum object. + + ### Peak fields + | Field | Type | Required | Description | + |---------|-------|----------|--------------------------------| + | `x` | float | Yes | Chemical shift in ppm | + | `y` | float | No | Peak intensity (default: 1.0) | + | `width` | float | No | Peak width in Hz (default: 1.0)| + + ### Options + | Option | Type | Default | Description | + |-------------|--------|---------|----------------------------| + | `nucleus` | string | `1H` | Nucleus type | + | `solvent` | string | `""` | NMR solvent | + | `frequency` | float | `400` | NMR frequency in MHz | + | `nb_points` | int | `131072`| Number of spectrum points | + + ### Returns + NMRium-compatible JSON with spectrum data and metadata. + """ + if not request.peaks: + raise HTTPException( + status_code=422, + detail="Peaks list cannot be empty.", + ) + + payload = { + "peaks": [peak.model_dump() for peak in request.peaks], + } + if request.options: + payload["options"] = { + "nucleus": request.options.nucleus, + "solvent": request.options.solvent, + "frequency": request.options.frequency, + "nbPoints": request.options.nbPoints, + } + + try: + raw_json = run_peaks_to_nmrium_command(payload) + return StreamingResponse( + io.BytesIO(raw_json.encode("utf-8")), + media_type="application/json", + headers={ + "Content-Disposition": "attachment; filename=nmrium-peaks.json", + }, + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=422, + detail=f"Error converting peaks to NMRium: {e}", + ) diff --git a/app/scripts/nmr-cli/Dockerfile b/app/scripts/nmr-cli/Dockerfile index 1887a51..dc94d69 100644 --- a/app/scripts/nmr-cli/Dockerfile +++ b/app/scripts/nmr-cli/Dockerfile @@ -1,10 +1,12 @@ # build the image ` docker build --tag nmr-cli . ` # run the container ` docker run -it nmr-cli bash ` -FROM mcr.microsoft.com/playwright:v1.56.1-noble +FROM mcr.microsoft.com/playwright:v1.58.2-noble SHELL ["/bin/bash", "-o", "pipefail", "-c"] +# Downgrade to Node 22 +RUN npm install -g n && n 22 && hash -r WORKDIR /app @@ -26,10 +28,9 @@ COPY . ./ RUN npm run build + # Install the nmr-cli as a global package +# For example, nmr-cli parse-spectra -u https://s3.uni-jena.de/nmrxiv/production/archive/d5ec45db-d984-46e9-bb72-c43fd4a463fa/Sali_Eth.zip -s # For example, nmr-cli parse-spectra -u https://cheminfo.github.io/bruker-data-test/data/zipped/aspirin-1h.zip # nmr-cli predict -n "1H" --id 1 --type "nmr;1H;1d" --shifts "1" --solvent "Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)" -m $"\n Ketcher 6122516162D 1 1.00000 0.00000 0\n\n 16 17 0 0 0 0 0 0 0 0999 V2000\n 1.1954 -4.6484 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.9258 -4.6479 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.0622 -4.1483 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.9258 -5.6488 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1954 -5.6533 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.0644 -6.1483 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.7902 -4.1495 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6574 -4.6498 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3.7964 -6.1512 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 4.6596 -5.6458 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.5228 -4.1488 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 5.5277 -6.1421 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 6.3895 -4.6477 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 5.5216 -3.1488 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 7.2548 -4.1466 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 8.1215 -4.6455 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 3 1 2 0 0 0 0\n 1 5 1 0 0 0 0\n 5 6 2 0 0 0 0\n 6 4 1 0 0 0 0\n 4 2 1 0 0 0 0\n 2 3 1 0 0 0 0\n 4 9 1 0 0 0 0\n 9 10 2 0 0 0 0\n 10 8 1 0 0 0 0\n 8 7 2 0 0 0 0\n 7 2 1 0 0 0 0\n 8 11 1 0 0 0 0\n 10 12 1 0 0 0 0\n 11 13 1 0 0 0 0\n 11 14 2 0 0 0 0\n 13 15 1 0 0 0 0\n 15 16 1 0 0 0 0\nM END" RUN npm install . -g - - - diff --git a/app/scripts/nmr-cli/package-lock.json b/app/scripts/nmr-cli/package-lock.json index 9e8e05d..188c9f4 100644 --- a/app/scripts/nmr-cli/package-lock.json +++ b/app/scripts/nmr-cli/package-lock.json @@ -9,16 +9,19 @@ "version": "1.0.0", "license": "ISC", "dependencies": { - "@zakodium/nmr-types": "^0.5.0", - "@zakodium/nmrium-core": "^0.5.6", - "@zakodium/nmrium-core-plugins": "^0.6.25", - "axios": "^1.13.2", - "file-collection": "^6.5.0", + "@zakodium/nmr-types": "^0.5.2", + "@zakodium/nmrium-core": "^0.6.5", + "@zakodium/nmrium-core-plugins": "^0.6.39", + "axios": "^1.13.5", + "fifo-logger": "^2.0.1", + "file-collection": "^6.6.0", + "json-stream-stringify": "^3.1.6", "lodash.merge": "^4.6.2", "mf-parser": "^3.6.0", - "ml-spectra-processing": "^14.19.0", - "nmr-processing": "^22.1.0", - "playwright": "^1.51.0", + "ml-spectra-processing": "^14.20.0", + "nmr-processing": "^22.5.0", + "openchemlib": "^9.20.0", + "playwright": "1.58.2", "yargs": "^18.0.0" }, "bin": { @@ -26,7 +29,7 @@ }, "devDependencies": { "@types/lodash.merge": "^4.6.9", - "@types/node": "^25.0.8", + "@types/node": "^25.3.0", "@types/yargs": "^17.0.35", "ts-node": "^10.9.2", "typescript": "^5.9.3" @@ -125,14 +128,14 @@ } }, "node_modules/@types/node": { - "version": "25.0.8", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.8.tgz", - "integrity": "sha512-powIePYMmC3ibL0UJ2i2s0WIbq6cg6UyVFQxSCpaPxxzAaziRfimGivjdF943sSGV6RADVbk0Nvlm5P/FB44Zg==", + "version": "25.3.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.0.tgz", + "integrity": "sha512-4K3bqJpXpqfg2XKGK9bpDTc6xO/xoUP/RBWS7AtRMug6zZFaRekiLzjVtAoZMquxoAbzBvy5nxQ7veS5eYzf8A==", "dev": true, "license": "MIT", "peer": true, "dependencies": { - "undici-types": "~7.16.0" + "undici-types": "~7.18.0" } }, "node_modules/@types/yargs": { @@ -153,59 +156,59 @@ "license": "MIT" }, "node_modules/@zakodium/nmr-types": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/@zakodium/nmr-types/-/nmr-types-0.5.0.tgz", - "integrity": "sha512-GjxJ8JQJ3DOzKY/ucxvCJ0X3nlGl8Qk9J6Kng4XTm+rZSKgwj2Xu98IDSZvpJKxr9v0bS0reXfmvNRvSe+fPgg==", + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/@zakodium/nmr-types/-/nmr-types-0.5.2.tgz", + "integrity": "sha512-tT7mezDYiiIFrgE1xunmobO9vbMOCMRHtJ3Ei3ArHHq49oGBPoe7a5rWWK6mX4YCUsahhgO503HKwufwge8pLg==", "license": "CC-BY-NC-SA-4.0", "dependencies": { "ml-peak-shape-generator": "^4.2.0", - "ml-signal-processing": "^2.1.0", - "ml-spectra-processing": "^14.18.1" + "ml-signal-processing": "^2.2.0", + "ml-spectra-processing": "^14.19.0" } }, "node_modules/@zakodium/nmrium-core": { - "version": "0.5.6", - "resolved": "https://registry.npmjs.org/@zakodium/nmrium-core/-/nmrium-core-0.5.6.tgz", - "integrity": "sha512-upeFb+KZECOIik3wxra9tjrbjiZvivIR4UDdQF3REibzyqB8zxsZrT4WMpdRkphqTvj51OK+yQgyc1R1TNqeLQ==", + "version": "0.6.5", + "resolved": "https://registry.npmjs.org/@zakodium/nmrium-core/-/nmrium-core-0.6.5.tgz", + "integrity": "sha512-xSlz1uL1xFZQkFyIqd3c8dffDfixHFrDU7MuZzDJcomC1lu8/uFqnveqq0I+XFhRrFPlf48gXTF9AuOb9GLA/g==", "license": "CC-BY-NC-SA-4.0", "dependencies": { - "cheminfo-types": "^1.8.1", + "cheminfo-types": "^1.10.0", "fifo-logger": "^2.0.1", - "file-collection": "^6.5.0", + "file-collection": "^6.6.0", "is-any-array": "^2.0.1", "lodash.merge": "^4.6.2", - "ml-spectra-processing": "^14.18.2", + "ml-spectra-processing": "^14.19.0", "nmr-correlation": "^2.3.5" } }, "node_modules/@zakodium/nmrium-core-plugins": { - "version": "0.6.25", - "resolved": "https://registry.npmjs.org/@zakodium/nmrium-core-plugins/-/nmrium-core-plugins-0.6.25.tgz", - "integrity": "sha512-11BfZNDKj1ei0q4TruBvSfE8VNoLEddm0WYHnOjY9yDDHHSmj0dqECvAnCRCnRbm0d/9IPx130bY4PQGHcMtQA==", + "version": "0.6.39", + "resolved": "https://registry.npmjs.org/@zakodium/nmrium-core-plugins/-/nmrium-core-plugins-0.6.39.tgz", + "integrity": "sha512-1kgPHIpP7vDXGgG17w9NkeGx0TQJNTXokzKYw/CIh5SqYNuPSZut4r28gWcqP3QBZzlnK+fXUFL2eSjZm35VFg==", "license": "CC-BY-NC-SA-4.0", "dependencies": { "@date-fns/utc": "^2.1.1", - "@zakodium/nmrium-core": "^0.5.6", - "cheminfo-types": "^1.8.1", + "@zakodium/nmrium-core": "^0.6.5", + "cheminfo-types": "^1.10.0", "convert-to-jcamp": "^6.0.0", "date-fns": "^4.1.0", - "file-collection": "^6.5.0", + "file-collection": "^6.6.0", "gyromagnetic-ratio": "^2.0.0", "is-any-array": "^2.0.1", - "jcampconverter": "^12.1.0", + "jcampconverter": "^12.2.0", "linear-sum-assignment": "^1.0.9", "lodash.merge": "^4.6.2", - "ml-spectra-processing": "^14.18.2", - "nmr-processing": "^22.1.0", - "openchemlib": "^9.18.2", - "openchemlib-utils": "^8.8.1", + "ml-spectra-processing": "^14.19.0", + "nmr-processing": "^22.5.0", + "openchemlib": "^9.20.0", + "openchemlib-utils": "^8.12.1", "sdf-parser": "^7.0.4" } }, "node_modules/@zip.js/zip.js": { - "version": "2.8.15", - "resolved": "https://registry.npmjs.org/@zip.js/zip.js/-/zip.js-2.8.15.tgz", - "integrity": "sha512-HZKJLFe4eGVgCe9J87PnijY7T1Zn638bEHS+Fm/ygHZozRpefzWcOYfPaP52S8pqk9g4xN3+LzMDl3Lv9dLglA==", + "version": "2.8.21", + "resolved": "https://registry.npmjs.org/@zip.js/zip.js/-/zip.js-2.8.21.tgz", + "integrity": "sha512-fkyzXISE3IMrstDO1AgPkJCx14MYHP/suIGiAovEYEuBjq3mffsuL6aMV7ohOSjW4rXtuACuUfpA3GtITgdtYg==", "license": "BSD-3-Clause", "engines": { "bun": ">=0.7.0", @@ -283,13 +286,13 @@ "license": "MIT" }, "node_modules/axios": { - "version": "1.13.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz", - "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", + "version": "1.13.5", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.5.tgz", + "integrity": "sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==", "license": "MIT", "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.4", + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", "proxy-from-env": "^1.1.0" } }, @@ -437,9 +440,9 @@ } }, "node_modules/diff": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", - "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.4.tgz", + "integrity": "sha512-X07nttJQkwkfKfvTPG/KSnE2OMdcUCao6+eXF3wmnIQRn2aPAHH3VxDbDOdegkd6JbPsXqShpvEOHfAT+nCNwQ==", "dev": true, "license": "BSD-3-Clause", "engines": { @@ -473,13 +476,12 @@ "license": "MIT" }, "node_modules/ensure-string": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/ensure-string/-/ensure-string-1.2.0.tgz", - "integrity": "sha512-Hvv2Xocfn6CSvCWgIGNzf0D5bgE6fAa65cayGOWsEXz03ej3aDpsogP3zstuLKt43zVuojQFST4XHPt9+PtuGQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ensure-string/-/ensure-string-2.0.0.tgz", + "integrity": "sha512-qej9cbqY36/lD8TdFFGugkTybqdxIWLXcqXPlLsuMoW4GJ8ExUGvLEXr+2sXbjssHiAO0hMpkK6tk3E8xphArQ==", "license": "MIT", "dependencies": { - "cheminfo-types": "^1.1.0", - "isutf8": "^4.0.0" + "cheminfo-types": "^1.10.0" } }, "node_modules/es-define-property": { @@ -552,9 +554,9 @@ } }, "node_modules/file-collection": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/file-collection/-/file-collection-6.5.0.tgz", - "integrity": "sha512-c54WgyKBiiFtEVrmmhGvHa5wY/diPr26ecFHm8T0k6YNUcnETx+Vm+LjfPa4wan0d8qT92Tmf8t8wRp9WuaZVA==", + "version": "6.6.0", + "resolved": "https://registry.npmjs.org/file-collection/-/file-collection-6.6.0.tgz", + "integrity": "sha512-QO1CaaSoqslwib8atFXiavl7OXQMf8JzlRmsDWg1rCWwdFjmzZDnpfkvrrS32XL4Q47Z8fKXOr21r2SMTt/EBQ==", "license": "MIT", "dependencies": { "@zip.js/zip.js": "^2.8.8", @@ -753,27 +755,27 @@ "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==", "license": "MIT" }, - "node_modules/isutf8": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/isutf8/-/isutf8-4.0.1.tgz", - "integrity": "sha512-1pk2/2pE+G48eETnp4uOLxQ9WUCxD7oVauYwhFEAGREJPDxEO7iX9qstylrCcx3lNWa1RCS2DxGTxrHdWqS7/w==", - "license": "MIT", - "engines": { - "node": ">= 12" - } - }, "node_modules/jcampconverter": { - "version": "12.1.0", - "resolved": "https://registry.npmjs.org/jcampconverter/-/jcampconverter-12.1.0.tgz", - "integrity": "sha512-LbI5kRC3MW94I8DkMdp8D4D8tm/cDHfi5i3h0qwh6SxmslOblnIM/JAcvoArE6sh6XvBg7RlYAxOnmZOPqBf/Q==", + "version": "12.2.0", + "resolved": "https://registry.npmjs.org/jcampconverter/-/jcampconverter-12.2.0.tgz", + "integrity": "sha512-gHEIRlIdz1n8NujQbyv4U8iCfS5go2gCRk+Z00fbtq1NMoPlwMmrUfW+VhE9rezqSlACe7roqMNIZOKV0JtBow==", "license": "CC-BY-NC-SA-4.0", "dependencies": { - "cheminfo-types": "^1.8.1", + "cheminfo-types": "^1.10.0", "dynamic-typing": "^1.0.1", - "ensure-string": "^1.2.0", + "ensure-string": "^2.0.0", "gyromagnetic-ratio": "^2.0.0", "ml-array-median": "^1.1.6", - "openchemlib": "^9.18.2" + "openchemlib": "^9.20.0" + } + }, + "node_modules/json-stream-stringify": { + "version": "3.1.6", + "resolved": "https://registry.npmjs.org/json-stream-stringify/-/json-stream-stringify-3.1.6.tgz", + "integrity": "sha512-x7fpwxOkbhFCaJDJ8vb1fBY3DdSa4AlITaz+HHILQJzdPMnHEFjxPwVUi1ALIbcIxDE0PNe/0i7frnY8QnBQog==", + "license": "MIT", + "engines": { + "node": ">=7.10.1" } }, "node_modules/linear-sum-assignment": { @@ -1152,9 +1154,9 @@ } }, "node_modules/ml-spectra-processing": { - "version": "14.19.0", - "resolved": "https://registry.npmjs.org/ml-spectra-processing/-/ml-spectra-processing-14.19.0.tgz", - "integrity": "sha512-PbvRh/cFneCDY629qpURsaq58oz0pF+TM8w5lLtvAVfZmA6QOHEf5eh93n73HXS7HgkAzLJNXnKefCW8kSBmKQ==", + "version": "14.20.0", + "resolved": "https://registry.npmjs.org/ml-spectra-processing/-/ml-spectra-processing-14.20.0.tgz", + "integrity": "sha512-wuhhxYNyNfIzD1QkMtE8mH0zcM8dn/ZrvAyZWVjKmdQxAl8wJMaBfUO+BW2RoGtX5tIqi3moh1ukat4ft10bAw==", "license": "MIT", "dependencies": { "binary-search": "^1.3.6", @@ -1207,9 +1209,9 @@ } }, "node_modules/nmr-processing": { - "version": "22.1.0", - "resolved": "https://registry.npmjs.org/nmr-processing/-/nmr-processing-22.1.0.tgz", - "integrity": "sha512-l/IFlmY6akhsIolRMMWnG4Mpp7KMqwidZJR7NEWNkySmKYI8NhVMNBiCbc3e3QSu4yVyhl0hEU7ToHobpDgpPg==", + "version": "22.5.0", + "resolved": "https://registry.npmjs.org/nmr-processing/-/nmr-processing-22.5.0.tgz", + "integrity": "sha512-/FLaMtROD1gwf8qqOfpynYNdtwRwqjSWzVtgcJvQekmHKFoALFwuxTV3Wj+LGmjCNWtAUauw/xb6fQ0DHAT0Tg==", "license": "CC-BY-NC-SA-4.0", "dependencies": { "binary-search": "^1.3.6", @@ -1233,16 +1235,16 @@ "ml-regression-polynomial": "^3.0.2", "ml-regression-simple-linear": "^3.0.1", "ml-savitzky-golay-generalized": "^4.2.0", - "ml-signal-processing": "^2.1.0", + "ml-signal-processing": "^2.2.0", "ml-simple-clustering": "^1.0.0", "ml-sparse-matrix": "^3.1.0", - "ml-spectra-processing": "^14.18.2", + "ml-spectra-processing": "^14.19.0", "ml-tree-set": "^1.0.1", "multiplet-analysis": "^2.1.5", "nmr-correlation": "^3.0.0", "numeral": "^2.0.6", - "openchemlib": "^9.18.2", - "openchemlib-utils": "^8.8.1", + "openchemlib": "^9.20.0", + "openchemlib-utils": "^8.12.1", "spectrum-generator": "^8.1.1" } }, @@ -1298,20 +1300,20 @@ } }, "node_modules/openchemlib": { - "version": "9.18.2", - "resolved": "https://registry.npmjs.org/openchemlib/-/openchemlib-9.18.2.tgz", - "integrity": "sha512-amgDEgH7lLOBGg3sS2XmxjY+n6zC8M+ohJqNgifKACkbjPuzmnzs85rbMHcAndMzn7e6hh7IwJ8FByWKdBhGSg==", + "version": "9.20.0", + "resolved": "https://registry.npmjs.org/openchemlib/-/openchemlib-9.20.0.tgz", + "integrity": "sha512-ttpQAiwUIjkn6SQrYc1KGy2IS5fP1OaARcafgaVjFykl4/aOqJgnxUtDoAjmFRei4WAUNVTkqVThlIskP36KxA==", "license": "BSD-3-Clause", "peer": true }, "node_modules/openchemlib-utils": { - "version": "8.10.0", - "resolved": "https://registry.npmjs.org/openchemlib-utils/-/openchemlib-utils-8.10.0.tgz", - "integrity": "sha512-a88tLw4rRjEdnGcruCoT7MyOviY5hD6ea/O0iw4QKdWJe/EK64dQq5/QASbdcHDLFth1MCzqJEliMIf37saYgQ==", + "version": "8.13.0", + "resolved": "https://registry.npmjs.org/openchemlib-utils/-/openchemlib-utils-8.13.0.tgz", + "integrity": "sha512-iFiV0M9Y2Ek83PFeraP9osN83qS5AGUKK86jCt3b8fLZFHWb6inNJW7stdhgNvF+NE/gYVol3LzuybNMH/PJCQ==", "license": "MIT", "dependencies": { "atom-sorter": "^2.2.1", - "ensure-string": "^1.2.0", + "ensure-string": "^2.0.0", "get-value": "^4.0.1", "ml-floyd-warshall": "^3.0.1", "ml-matrix": "^6.12.1", @@ -1329,12 +1331,12 @@ "license": "MIT" }, "node_modules/playwright": { - "version": "1.57.0", - "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.57.0.tgz", - "integrity": "sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==", + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", "license": "Apache-2.0", "dependencies": { - "playwright-core": "1.57.0" + "playwright-core": "1.58.2" }, "bin": { "playwright": "cli.js" @@ -1347,9 +1349,9 @@ } }, "node_modules/playwright-core": { - "version": "1.57.0", - "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.57.0.tgz", - "integrity": "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==", + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", "license": "Apache-2.0", "bin": { "playwright-core": "cli.js" @@ -1365,13 +1367,13 @@ "license": "MIT" }, "node_modules/sdf-parser": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/sdf-parser/-/sdf-parser-7.0.4.tgz", - "integrity": "sha512-j4VIBVTVJqs95Asz0lcIku1VjlVgKKWlwDV2JmCIq1exlI9WOSBT9rQvwvTPwl0v2TTgdQLfhYnBmiRg318LoQ==", + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/sdf-parser/-/sdf-parser-7.0.5.tgz", + "integrity": "sha512-QCKaHj5yfWuyAuCi7umH49Ec6VWNR1HSesIHOn0Q1A06PeYbPIV4GxyFyGEUfmERwJkOpDPG2FShV8yn2SoGog==", "license": "MIT", "dependencies": { "dynamic-typing": "^1.0.1", - "ensure-string": "^1.2.0" + "ensure-string": "^2.0.0" } }, "node_modules/spectrum-generator": { @@ -1482,9 +1484,9 @@ "license": "MIT" }, "node_modules/undici-types": { - "version": "7.16.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", - "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", "dev": true, "license": "MIT" }, diff --git a/app/scripts/nmr-cli/package.json b/app/scripts/nmr-cli/package.json index c511322..c1cda08 100644 --- a/app/scripts/nmr-cli/package.json +++ b/app/scripts/nmr-cli/package.json @@ -15,21 +15,24 @@ "nmr-cli": "./build/index.js" }, "dependencies": { - "@zakodium/nmr-types": "^0.5.0", - "@zakodium/nmrium-core": "^0.5.6", - "@zakodium/nmrium-core-plugins": "^0.6.25", - "axios": "^1.13.2", - "file-collection": "^6.5.0", + "@zakodium/nmr-types": "^0.5.2", + "@zakodium/nmrium-core": "^0.6.5", + "@zakodium/nmrium-core-plugins": "^0.6.39", + "axios": "^1.13.5", + "fifo-logger": "^2.0.1", + "file-collection": "^6.6.0", + "json-stream-stringify": "^3.1.6", "lodash.merge": "^4.6.2", "mf-parser": "^3.6.0", - "ml-spectra-processing": "^14.19.0", - "nmr-processing": "^22.1.0", - "playwright": "1.56.1", + "ml-spectra-processing": "^14.20.0", + "nmr-processing": "^22.5.0", + "openchemlib": "^9.20.0", + "playwright": "1.58.2", "yargs": "^18.0.0" }, "devDependencies": { "@types/lodash.merge": "^4.6.9", - "@types/node": "^25.0.8", + "@types/node": "^25.3.0", "@types/yargs": "^17.0.35", "ts-node": "^10.9.2", "typescript": "^5.9.3" diff --git a/app/scripts/nmr-cli/src/index.ts b/app/scripts/nmr-cli/src/index.ts index 4368b21..74c7000 100755 --- a/app/scripts/nmr-cli/src/index.ts +++ b/app/scripts/nmr-cli/src/index.ts @@ -1,9 +1,13 @@ #!/usr/bin/env node import yargs, { type Argv, type CommandModule, type Options } from 'yargs' -import { loadSpectrumFromURL, loadSpectrumFromFilePath } from './parse/prase-spectra' +import { parseSpectra } from './parse/prase-spectra' import { generateSpectrumFromPublicationString } from './publication-string' -import { parsePredictionCommand } from './prediction/parsePredictionCommand' +import { generateNMRiumFromPeaks } from './peaks-to-nmrium' +import type { PeaksToNMRiumInput } from './peaks-to-nmrium' import { hideBin } from 'yargs/helpers' +import { parsePredictionCommand } from './prediction' +import { readFileSync } from 'fs' +import { IncludeData } from '@zakodium/nmrium-core' const usageMessage = ` Usage: nmr-cli [options] @@ -12,6 +16,7 @@ Commands: parse-spectra Parse a spectra file to NMRium file parse-publication-string resurrect spectrum from the publication string predict Predict spectrum from Mol + peaks-to-nmrium Convert a peak list to NMRium object Options for 'parse-spectra' command: -u, --url File URL @@ -19,33 +24,71 @@ Options for 'parse-spectra' command: -s, --capture-snapshot Capture snapshot -p, --auto-processing Automatic processing of spectrum (FID → FT spectra). -d, --auto-detection Enable ranges and zones automatic detection. - + -o, --output Output file path (optional) + -r, --raw-data Include raw data in the output instead of data source + Arguments for 'parse-publication-string' command: publicationString Publication string Options for 'predict' command: - -ps,--peakShape Peak shape algorithm (default: "lorentzian") choices: ["gaussian", "lorentzian"] - -n, --nucleus Predicted nucleus, choices: ["1H","13C"] (required) + +Common options: + -e, --engine Prediction engine (required) choices: ["nmrdb.org", "nmrshift"] + --spectra Spectra types to predict (required) choices: ["proton", "carbon", "cosy", "hsqc", "hmbc"] + -s, --structure MOL file content (structure) (required) + +nmrdb.org engine options: + --name Compound name (default: "") + --frequency NMR frequency (MHz) (default: 400) + --protonFrom Proton (1H) from in ppm (default: -1) + --protonTo Proton (1H) to in ppm (default: 12) + --carbonFrom Carbon (13C) from in ppm (default: -5) + --carbonTo Carbon (13C) to in ppm (default: 220) + --nbPoints1d 1D number of points (default: 131072) + --lineWidth 1D line width (default: 1) + --nbPoints2dX 2D spectrum X-axis points (default: 1024) + --nbPoints2dY 2D spectrum Y-axis points (default: 1024) + --autoExtendRange Auto extend range (default: true) + +nmrshift engine options: -i, --id Input ID (default: 1) - -t, --type NMR type (default: "nmr;1H;1d") - -s, --shifts Chemical shifts (default: "1") + --shifts Chemical shifts (default: "1") --solvent NMR solvent (default: "Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)") - -m, --molText MOL text (required) - --from From in (ppm) - --to To in (ppm) + choices: ["Any", "Chloroform-D1 (CDCl3)", "Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)", + "Methanol-D4 (CD3OD)", "Deuteriumoxide (D2O)", "Acetone-D6 ((CD3)2CO)", + "TETRACHLORO-METHANE (CCl4)", "Pyridin-D5 (C5D5N)", "Benzene-D6 (C6D6)", + "neat", "Tetrahydrofuran-D8 (THF-D8, C4D4O)"] + --from From in (ppm) for spectrum generation + --to To in (ppm) for spectrum generation --nbPoints Number of points (default: 1024) --lineWidth Line width (default: 1) --frequency NMR frequency (MHz) (default: 400) --tolerance Tolerance to group peaks with close shift (default: 0.001) + -ps,--peakShape Peak shape algorithm (default: "lorentzian") choices: ["gaussian", "lorentzian"] +Arguments for 'peaks-to-nmrium' command: + Reads JSON from stdin with the following structure: + { + "peaks": [{ "x": 7.26, "y": 1, "width": 1 }, ...], + "options": { + "nucleus": "1H", (default: "1H") + "solvent": "", (default: "") + "frequency": 400, (default: 400) + "from": -1, (optional, auto-computed from peaks) + "to": 12, (optional, auto-computed from peaks) + "nbPoints": 131072 (default: 131072) + } + } + Examples: nmr-cli parse-spectra -u file-url -s // Process spectra files from a URL and capture an image for the spectra nmr-cli parse-spectra -dir directory-path -s // process a spectra files from a directory and capture an image for the spectra nmr-cli parse-spectra -u file-url // Process spectra files from a URL nmr-cli parse-spectra -dir directory-path // Process spectra files from a directory nmr-cli parse-publication-string "your publication string" + echo '{"peaks":[{"x":7.26},{"x":2.10}]}' | nmr-cli peaks-to-nmrium // Convert peaks to NMRium object ` export interface FileOptionsArgs { @@ -79,6 +122,17 @@ export interface FileOptionsArgs { * Perform automatic ranges and zones detection. */ d?: boolean; + /** + * -o, --output + * Output file path + */ + o?: string; + /** + * -r, --raw-data + * Include raw data in the output, defaults to dataSource + */ + r?: boolean; + } // Define options for parsing a spectra file @@ -110,6 +164,17 @@ const fileOptions: { [key in keyof FileOptionsArgs]: Options } = { describe: 'Ranges and zones auto detection', type: 'boolean', }, + o: { + alias: 'output', + type: 'string', + description: 'Output file path', + }, + r: { + alias: 'raw-data', + type: 'boolean', + default: false, + description: 'Include raw data in the output (default: dataSource)', + }, } as const const parseFileCommand: CommandModule<{}, FileOptionsArgs> = { @@ -121,22 +186,7 @@ const parseFileCommand: CommandModule<{}, FileOptionsArgs> = { .conflicts('u', 'dir') as Argv }, handler: argv => { - - const { u, dir } = argv; - // Handle parsing the spectra file logic based on argv options - if (u) { - loadSpectrumFromURL({ u, ...argv }).then(result => { - console.log(JSON.stringify(result)) - }) - } - - - if (dir) { - loadSpectrumFromFilePath({ dir, ...argv }).then(result => { - console.log(JSON.stringify(result)) - }) - } - + parseSpectra(argv) }, } @@ -156,11 +206,32 @@ const parsePublicationCommand: CommandModule = { }, } +// Define the peaks-to-nmrium command +const peaksToNMRiumCommand: CommandModule = { + command: ['peaks-to-nmrium', 'ptn'], + describe: 'Convert a peak list to NMRium object (reads JSON from stdin)', + handler: () => { + try { + const stdinData = readFileSync(0, 'utf-8') + const input: PeaksToNMRiumInput = JSON.parse(stdinData) + const nmriumObject = generateNMRiumFromPeaks(input) + console.log(JSON.stringify(nmriumObject)) + } catch (error) { + console.error( + 'Error:', + error instanceof Error ? error.message : String(error), + ) + process.exit(1) + } + }, +} + yargs(hideBin(process.argv)) .usage(usageMessage) .command(parseFileCommand) .command(parsePublicationCommand) .command(parsePredictionCommand) + .command(peaksToNMRiumCommand) .showHelpOnFail(true) .help() .parse() diff --git a/app/scripts/nmr-cli/src/parse/prase-spectra.ts b/app/scripts/nmr-cli/src/parse/prase-spectra.ts index bf84bd9..7701a06 100644 --- a/app/scripts/nmr-cli/src/parse/prase-spectra.ts +++ b/app/scripts/nmr-cli/src/parse/prase-spectra.ts @@ -10,18 +10,28 @@ import { initiateDatum1D } from './data/data1D/initiateDatum1D' import { detectZones } from './data/data2d/detectZones' import { detectRanges } from './data/data1D/detectRanges' import { Filters1DManager, Filters2DManager } from 'nmr-processing' +import yargs from 'yargs' +import { createWriteStream } from 'fs' +import { JsonStreamStringify } from 'json-stream-stringify'; +import { FifoLogger } from 'fifo-logger' type RequiredKey = Omit & Required>; +function toMessage(e: unknown): string { + return e instanceof Error ? e.message : String(e) +} + + const parsingOptions: ParsingOptions = { onLoadProcessing: { autoProcessing: true }, selector: { general: { dataSelection: 'preferFT' } }, experimentalFeatures: true, }; + interface Snapshot { - image: string - id: string + id: string; + image: string | null; } const core = init() @@ -33,99 +43,158 @@ function generateNMRiumURL() { return url.toString() } -async function captureSpectraViewAsBase64(nmriumState: Partial) { - const { data: { spectra } = { spectra: [] }, version } = nmriumState - const browser = await playwright.chromium.launch() - const context = await browser.newContext( - playwright.devices['Desktop Chrome HiDPI'] - ) - const page = await context.newPage() +async function launchBrowser() { + return playwright.firefox.launch(); +} - const url = generateNMRiumURL() +async function captureSpectraViewAsBase64(nmriumState: Partial, logger: FifoLogger): Promise { + const { data: { spectra } = { spectra: [] }, version } = nmriumState; - await page.goto(url) + if (!spectra?.length) return []; - await page.locator('text=Loading').waitFor({ state: 'hidden' }) + const url = generateNMRiumURL(); + const snapshots: Snapshot[] = []; + let browser = await launchBrowser(); - let snapshots: Snapshot[] = [] + for (const spectrum of spectra) { + let context = null; - for (const spectrum of spectra || []) { - const spectrumObject = { - version, - data: { - spectra: [{ ...spectrum }], - }, - } - - // convert typed array to array - const stringObject = JSON.stringify( - spectrumObject, - (key, value: unknown) => { - return ArrayBuffer.isView(value) - ? Array.from(value as unknown as Iterable) - : value + try { + // recreate browser if it has crashed + if (!browser.isConnected()) { + browser = await launchBrowser(); } - ) - // load the spectrum into NMRium using the custom event - await page.evaluate( - ` - window.postMessage({ type: "nmr-wrapper:load", data:{data: ${stringObject},type:"nmrium"}}, '*'); - ` - ) + context = await browser.newContext(playwright.devices['Desktop Chrome HiDPI']); + const page = await context.newPage(); - //wait for NMRium process and load spectra - await page.locator('text=Loading').waitFor({ state: 'hidden' }) + await page.goto(url); + await page.locator('text=Loading').waitFor({ state: 'hidden' }); - // take a snapshot for the spectrum - try { - const snapshot = await page.locator('#nmrSVG .container').screenshot() + const stringObject = JSON.stringify( + { version, data: { spectra: [{ ...spectrum }] } }, + (key, value: unknown) => ArrayBuffer.isView(value) ? Array.from(value as unknown as Iterable) : value + ); + + await page.evaluate(` + window.postMessage({ type: "nmr-wrapper:load", data: { data: ${stringObject}, type: "nmrium" } }, '*'); + `); + + await page.locator('text=Loading').waitFor({ state: 'hidden' }); + + const snapshot = await page.locator('#nmrSVG .container').screenshot(); + snapshots.push({ id: spectrum.id, image: snapshot.toString('base64') }); - snapshots.push({ - image: snapshot.toString('base64'), - id: spectrum.id, - }) } catch (e) { - console.log(e) + logger.error({ id: spectrum.id, stage: 'snapshot', details: toMessage(e) }, 'Failed to capture snapshot for spectrum with id: ' + spectrum.id); + // browser crashed — close and recreate for next spectrum + await browser.close().catch(() => { }); + browser = await launchBrowser(); + + } finally { + await context?.close().catch(() => { }); } } - await context.close() - await browser.close() - - return snapshots - + await browser.close().catch(() => { }); + return snapshots; } + interface ProcessSpectraOptions { autoDetection: boolean; autoProcessing: boolean; } -function processSpectra(data: NmriumData, options: ProcessSpectraOptions) { +function processSpectra(data: NmriumData, options: ProcessSpectraOptions, logger: FifoLogger) { const { autoDetection = false, autoProcessing = false } = options for (let index = 0; index < data.spectra.length; index++) { const inputSpectrum = data.spectra[index] const is2D = isSpectrum2D(inputSpectrum); - const spectrum = is2D ? initiateDatum2D(inputSpectrum) : initiateDatum1D(inputSpectrum); + let spectrum = null; + try { + + spectrum = is2D ? initiateDatum2D(inputSpectrum) : initiateDatum1D(inputSpectrum); + } catch (e) { + logger.error({ id: inputSpectrum.id, stage: 'parsing', details: toMessage(e) }, 'Failed to parse spectrum with id: ' + inputSpectrum.id); + continue; + } if (autoProcessing) { - isSpectrum2D(spectrum) ? Filters2DManager.reapplyFilters(spectrum) : Filters1DManager.reapplyFilters(spectrum) + try { + + isSpectrum2D(spectrum) ? Filters2DManager.reapplyFilters(spectrum) : Filters1DManager.reapplyFilters(spectrum) + } catch (e) { + logger.error({ id: inputSpectrum.id, stage: 'processing', details: toMessage(e) }, 'Failed to process spectrum with id: ' + inputSpectrum.id); + } } if (autoDetection && spectrum.info.isFt) { - isSpectrum2D(spectrum) ? detectZones(spectrum) : detectRanges(spectrum); + try { + isSpectrum2D(spectrum) ? detectZones(spectrum) : detectRanges(spectrum); + } catch (e) { + logger.error({ id: inputSpectrum.id, stage: 'detection', details: toMessage(e) }, 'Failed to detect spectrum peaks with id: ' + inputSpectrum.id); + } } + if (!spectrum) continue; + data.spectra[index] = spectrum; } } -async function loadSpectrumFromURL(options: RequiredKey) { - const { u: url, s: enableSnapshot = false, p: autoProcessing = false, d: autoDetection = false } = options; +function outputResult(result: any, outputPath?: string) { + const stream = new JsonStreamStringify(result); + + if (outputPath) { + const writeStream = createWriteStream(outputPath); + stream.pipe(writeStream); + writeStream.on('finish', () => { + process.stderr.write(`Output written to: ${outputPath}\n`); + }); + } else { + stream.pipe(process.stdout); + } +} + +async function processAndSerialize( + nmriumState: Partial, + options: FileOptionsArgs, + logger: FifoLogger +) { + const { s: enableSnapshot = false, p: autoProcessing = false, d: autoDetection = false, o, r } = options; + + if (nmriumState.data) { + processSpectra(nmriumState.data, { autoDetection, autoProcessing }, logger); + } + + const images: Snapshot[] = enableSnapshot + ? await captureSpectraViewAsBase64(nmriumState, logger) + : []; + + const { data, version } = core.serializeNmriumState( + nmriumState as NmriumState, + { includeData: r ? 'rawData' : 'dataSource', }, + + ); + + // include the meta and info object in case of serialize as dataSource + const spectra: any = data?.spectra || []; + if (!r) { + for (let i = 0; i < spectra.length; i++) { + const { info = {}, meta = {} } = nmriumState.data?.spectra[i] || {}; + spectra[i] = { ...spectra[i], info, meta } + } + } + const errors = logger.getLogs({ minLevel: 'error' }) + outputResult({ data, version, images, errors }, o); +} + +async function loadSpectrumFromURL(options: RequiredKey, logger: FifoLogger) { + const { u: url } = options; const { pathname: relativePath, origin: baseURL } = new URL(url) const source = { @@ -137,27 +206,15 @@ async function loadSpectrumFromURL(options: RequiredKey) { baseURL, } - const [nmriumState] = await core.readFromWebSource(source, parsingOptions); - const { - data, version - } = nmriumState; - - - if (data) { - processSpectra(data, { autoDetection, autoProcessing }); - } - let images: Snapshot[] = [] + const [nmriumState] = await core.readFromWebSource(source, { ...parsingOptions, logger }); - if (enableSnapshot) { - images = await captureSpectraViewAsBase64({ data, version }) - } + processAndSerialize(nmriumState, options, logger) - return { data, version, images } } -async function loadSpectrumFromFilePath(options: RequiredKey) { - const { dir: path, s: enableSnapshot = false, p: autoProcessing = false, d: autoDetection = false } = options; +async function loadSpectrumFromFilePath(options: RequiredKey, logger: FifoLogger) { + const { dir: path } = options; const dirPath = isAbsolute(path) ? path : join(process.cwd(), path) @@ -166,21 +223,34 @@ async function loadSpectrumFromFilePath(options: RequiredKey +) { + const logger = new FifoLogger(); + + const { u, dir } = argv; + // Handle parsing the spectra file logic based on argv options + if (u) { + loadSpectrumFromURL({ u, ...argv }, logger); } - let images: Snapshot[] = [] - if (enableSnapshot) { - images = await captureSpectraViewAsBase64({ data, version }) + if (dir) { + loadSpectrumFromFilePath({ dir, ...argv }, logger); } - return { data, version, images } + + } -export { loadSpectrumFromFilePath, loadSpectrumFromURL } + + + +export { loadSpectrumFromFilePath, loadSpectrumFromURL, parseSpectra } diff --git a/app/scripts/nmr-cli/src/peaks-to-nmrium.ts b/app/scripts/nmr-cli/src/peaks-to-nmrium.ts new file mode 100644 index 0000000..d61df95 --- /dev/null +++ b/app/scripts/nmr-cli/src/peaks-to-nmrium.ts @@ -0,0 +1,80 @@ +import { peaksToXY } from 'nmr-processing' +import { CURRENT_EXPORT_VERSION } from '@zakodium/nmrium-core' +import type { NMRPeak1D } from '@zakodium/nmr-types' +import { castToArray } from './utilities/castToArray' + +interface PeakInput { + x: number + y?: number + width?: number +} + +interface PeaksToNMRiumOptions { + nucleus?: string + solvent?: string + frequency?: number + from?: number + to?: number + nbPoints?: number +} + +interface PeaksToNMRiumInput { + peaks: PeakInput[] + options?: PeaksToNMRiumOptions +} + +function generateNMRiumFromPeaks(input: PeaksToNMRiumInput) { + const { peaks, options = {} } = input + const { + nucleus = '1H', + solvent = '', + frequency = 400, + from, + to, + nbPoints = 131072, + } = options + + if (!peaks || peaks.length === 0) { + throw new Error('Peaks array is empty or not provided') + } + + const defaultWidth = 1 + const nmrPeaks: NMRPeak1D[] = peaks.map((peak) => ({ + x: peak.x, + y: peak.y ?? 1, + width: peak.width ?? defaultWidth, + })) + + const xyOptions: Parameters[1] = { + frequency, + nbPoints, + ...(from !== undefined && { from }), + ...(to !== undefined && { to }), + } + + const { x, y } = peaksToXY(nmrPeaks, xyOptions) + + const info = { + isFid: false, + isComplex: false, + dimension: 1, + nucleus, + originFrequency: frequency, + baseFrequency: frequency, + pulseSequence: '', + solvent, + isFt: true, + name: '', + } + + const spectrum = { + id: crypto.randomUUID(), + data: { x: castToArray(x), im: undefined, re: castToArray(y) }, + info, + } + + return { data: { spectra: [spectrum] }, version: CURRENT_EXPORT_VERSION } +} + +export { generateNMRiumFromPeaks } +export type { PeaksToNMRiumInput, PeakInput, PeaksToNMRiumOptions } diff --git a/app/scripts/nmr-cli/src/prediction/engines/base.ts b/app/scripts/nmr-cli/src/prediction/engines/base.ts new file mode 100644 index 0000000..c60c1bb --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/base.ts @@ -0,0 +1,64 @@ +import type { Options } from 'yargs' +import type { Spectrum } from '@zakodium/nmrium-core' + +/** + * Supported experiment types + */ +export type Experiment = 'proton' | 'carbon' | 'cosy' | 'hsqc' | 'hmbc' + +/** + * Nucleus types used in NMR + */ +export type Nucleus = '1H' | '13C' + +/** + * Map from experiment name to nucleus + */ +export const experimentToNucleus: Record = { + proton: '1H', + carbon: '13C', +} + +/** + * Base interface that all engines must implement + */ +export interface Engine { + /** Unique engine identifier (e.g., 'nmrdb.org') */ + readonly id: string + + readonly name: string + readonly description: string + readonly supportedSpectra: readonly Experiment[] + + /** Command-line options specific to this engine */ + readonly options: Record + + /** List of required option keys */ + readonly requiredOptions: readonly string[] + + /** + * Build the payload options for the API request + * @param argv - Command line arguments + * @returns Options object to send in the API payload + */ + buildPayloadOptions(argv: Record): any + + /** + * Predict and generate spectra + * This is the main entry point for prediction + * @param structure - MOL file content + * @param options - Command line options + * @returns Array of generated spectra + */ + predict( + structure: string, + options: Record, + ): Promise + + /** + * Optional: Custom validation beyond required options + * @param argv - Command line arguments + * @returns true if valid, error message if invalid + */ + validate?(argv: Record): true | string +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/index.ts b/app/scripts/nmr-cli/src/prediction/engines/index.ts new file mode 100644 index 0000000..c1bfe8c --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/index.ts @@ -0,0 +1,5 @@ +import './nmrdb/nmrdb.engine' +import './nmrshift/nmrshift.engine' + +export { engineRegistry } from './registry'; +export type { Engine } from './base'; diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/checkFromTo.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/checkFromTo.ts new file mode 100644 index 0000000..1720c0c --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/checkFromTo.ts @@ -0,0 +1,91 @@ +import { xMinMaxValues } from "ml-spectra-processing" +import { Experiment } from "../../base" +import { isProton } from "../../../../utilities/isProton" +import { Prediction1D, Prediction2D } from "nmr-processing" +import { PredictedSpectraResult, PredictionOptions } from "../nmrdb.engine" + +export function checkFromTo( + predictedSpectra: PredictedSpectraResult, + inputOptions: PredictionOptions, +) { + const setFromTo = (inputOptions: any, nucleus: any, fromTo: any) => { + inputOptions['1d'][nucleus].to = fromTo.to + inputOptions['1d'][nucleus].from = fromTo.from + if (fromTo.signalsOutOfRange) { + signalsOutOfRange[nucleus] = true + } + } + + const { autoExtendRange, spectra } = inputOptions + const signalsOutOfRange: Record = {} + + for (const exp in predictedSpectra) { + const experiment = exp as Experiment + if (!spectra[experiment]) continue + if (predictedSpectra[experiment]?.signals.length === 0) continue + + if (['carbon', 'proton'].includes(experiment)) { + const spectrum = predictedSpectra[experiment] as Prediction1D + const { signals, nucleus } = spectrum + const { from, to } = (inputOptions['1d'] as any)[nucleus] + const fromTo = getNewFromTo({ + deltas: signals.map((s) => s.delta), + from, + to, + nucleus, + autoExtendRange, + }) + setFromTo(inputOptions, nucleus, fromTo) + } else { + const { signals, nuclei } = predictedSpectra[experiment] as Prediction2D + for (const nucleus of nuclei) { + const axis = isProton(nucleus) ? 'x' : 'y' + const { from, to } = (inputOptions['1d'] as any)[nucleus] + const fromTo = getNewFromTo({ + deltas: signals.map((s) => s[axis].delta), + from, + to, + nucleus, + autoExtendRange, + }) + setFromTo(inputOptions, nucleus, fromTo) + } + } + } + + for (const nucleus of ['1H', '13C']) { + if (signalsOutOfRange[nucleus]) { + const { from, to } = (inputOptions['1d'] as any)[nucleus] + if (autoExtendRange) { + console.log( + `There are ${nucleus} signals out of the range, it was extended to ${from}-${to}.`, + ) + } else { + console.log(`There are ${nucleus} signals out of the range.`) + } + } + } +} + + + +function getNewFromTo(params: { + deltas: number[] + from: number + to: number + nucleus: string + autoExtendRange: boolean +}) { + const { deltas, nucleus, autoExtendRange } = params + let { from, to } = params + const { min, max } = xMinMaxValues(deltas) + const signalsOutOfRange = from > min || to < max + + if (autoExtendRange && signalsOutOfRange) { + const spread = isProton(nucleus) ? 0.2 : 2 + if (from > min) from = min - spread + if (to < max) to = max + spread + } + + return { from, to, signalsOutOfRange } +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generateName.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generateName.ts new file mode 100644 index 0000000..6c4f623 --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generateName.ts @@ -0,0 +1,8 @@ +export function generateName( + name: string, + options: { frequency: number | number[]; experiment: string }, +) { + const { frequency, experiment } = options + const freq = Array.isArray(frequency) ? frequency[0] : frequency + return name || `${experiment.toUpperCase()}_${freq}MHz` +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generated1DSpectrum.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generated1DSpectrum.ts new file mode 100644 index 0000000..fe62616 --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generated1DSpectrum.ts @@ -0,0 +1,71 @@ +import { getRelativeFrequency, mapRanges, signalsToRanges, signalsToXY, updateIntegralsRelativeValues } from "nmr-processing" +import { generateName } from "./generateName" +import { initiateDatum1D } from "../../../../parse/data/data1D/initiateDatum1D" +import { PredictionOptions } from "../nmrdb.engine" + +export function generated1DSpectrum(params: { + options: PredictionOptions + spectrum: any + experiment: string + color: string +}) { + const { spectrum, options, experiment, color } = params + const { signals, joinedSignals, nucleus } = spectrum + + const { + name, + '1d': { nbPoints, lineWidth }, + frequency: freq, + } = options + + const SpectrumName = generateName(name, { frequency: freq, experiment }) + const frequency = getRelativeFrequency(nucleus, { + frequency: freq, + nucleus, + }) + + const { x, y } = signalsToXY(signals, { + ...(options['1d'] as any)[nucleus], + frequency, + nbPoints, + lineWidth, + }) + + const first = x[0] ?? 0 + const last = x.at(-1) ?? 0 + const getFreqOffset = (freq: any) => { + return (first + last) * freq * 0.5 + } + + const datum = initiateDatum1D( + { + data: { x, im: null, re: y }, + display: { color }, + info: { + nucleus, + originFrequency: frequency, + baseFrequency: frequency, + frequencyOffset: Array.isArray(frequency) + ? frequency.map(getFreqOffset) + : getFreqOffset(frequency), + pulseSequence: 'prediction', + spectralWidth: Math.abs(first - last), + solvent: '', + experiment, + isFt: true, + name: SpectrumName, + title: SpectrumName, + }, + }, + {}, + ) + + datum.ranges.values = mapRanges( + signalsToRanges(joinedSignals, { frequency }), + datum, + ) + updateIntegralsRelativeValues(datum) + + return datum +} + diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generated2DSpectrum.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generated2DSpectrum.ts new file mode 100644 index 0000000..c308f1b --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/generated2DSpectrum.ts @@ -0,0 +1,60 @@ +import { calculateRelativeFrequency, PredictionBase2D, signals2DToZ } from "nmr-processing" +import { generateName } from "./generateName" +import { getSpectralWidth } from "./getSpectralWidth" +import { initiateDatum2D } from "../../../../parse/data/data2d/initiateDatum2D" +import { adjustAlpha } from "../../../../utilities/adjustAlpha" +import { mapZones } from "./mapZones" +import { PredictionOptions } from "../nmrdb.engine" + +export function generated2DSpectrum(params: { + options: PredictionOptions + spectrum: PredictionBase2D + experiment: string + color: string +}) { + const { spectrum, options, experiment, color } = params + const { signals, zones, nuclei } = spectrum + const xOption = (options['1d'] as any)[nuclei[0]] + const yOption = (options['1d'] as any)[nuclei[1]] + + const width = nuclei[0] === nuclei[1] ? 0.02 : { x: 0.02, y: 0.2133 } + const frequency = calculateRelativeFrequency(nuclei, options.frequency) + + const minMaxContent = signals2DToZ(signals, { + from: { x: xOption.from, y: yOption.from }, + to: { x: xOption.to, y: yOption.to }, + nbPoints: { + x: options['2d'].nbPoints.x, + y: options['2d'].nbPoints.y, + }, + width, + factor: 3, + }) + + const SpectrumName = generateName(options.name, { + frequency, + experiment, + }) + + const spectralWidth = getSpectralWidth(experiment, options) + const datum = initiateDatum2D({ + data: { rr: { ...minMaxContent, noise: 0.01 } }, + display: { + positiveColor: color, + negativeColor: adjustAlpha(color, 40), + }, + info: { + name: SpectrumName, + title: SpectrumName, + nucleus: nuclei, + originFrequency: frequency, + baseFrequency: frequency, + pulseSequence: 'prediction', + spectralWidth, + experiment, + }, + }) + + datum.zones.values = mapZones(zones) + return datum +} diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/getSpectralWidth.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/getSpectralWidth.ts new file mode 100644 index 0000000..305fbf5 --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/getSpectralWidth.ts @@ -0,0 +1,23 @@ +import { PredictionOptions } from "../nmrdb.engine" + +export function getSpectralWidth(experiment: string, options: PredictionOptions) { + const formTo = options['1d'] + + switch (experiment) { + case 'cosy': { + const { from, to } = formTo['1H'] + const diff = to - from + return [diff, diff] + } + case 'hsqc': + case 'hmbc': { + const proton = formTo['1H'] + const carbon = formTo['13C'] + const protonDiff = proton.to - proton.from + const carbonDiff = carbon.to - carbon.from + return [protonDiff, carbonDiff] + } + default: + return [] + } +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/mapZones.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/mapZones.ts new file mode 100644 index 0000000..338d790 --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/core/mapZones.ts @@ -0,0 +1,30 @@ +import { Peak2D, Signal2D, Zone } from "@zakodium/nmr-types" +import { NMRZone } from "nmr-processing" + +export function mapZones(zones: NMRZone[]): Zone[] { + return zones.map((zone): Zone => { + const { signals, ...resZone } = zone + const newSignals = signals.map((signal): Signal2D => { + const { x, y, id, peaks, kind, ...resSignal } = signal + return { + ...resSignal, + id: id || crypto.randomUUID(), + kind: kind || 'signal', + x: { ...x, originalDelta: x.delta || 0 }, + y: { ...y, originalDelta: y.delta || 0 }, + peaks: peaks?.map( + (peak): Peak2D => ({ + ...peak, + id: peak.id || crypto.randomUUID(), + }), + ), + } + }) + return { + ...resZone, + id: crypto.randomUUID(), + signals: newSignals, + kind: 'signal', + } + }) +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrdb/nmrdb.engine.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/nmrdb.engine.ts new file mode 100644 index 0000000..ea7b90b --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrdb/nmrdb.engine.ts @@ -0,0 +1,227 @@ +import type { Options } from 'yargs' +import type { Spectrum } from '@zakodium/nmrium-core' +import type { + Predicted, + PredictionBase1D, + PredictionBase2D, + PredictionOptionsByExperiment, +} from 'nmr-processing' +import { predict } from 'nmr-processing' +import { Molecule } from 'openchemlib' + +import { defineEngine } from '../registry' +import type { Experiment } from '../base' +import { checkFromTo } from './core/checkFromTo' +import { generated1DSpectrum } from './core/generated1DSpectrum' +import { generated2DSpectrum } from './core/generated2DSpectrum' +export type PredictedSpectraResult = Partial< + Record +> + +export interface PredictionOptions { + name: string + frequency: number + '1d': { + '1H': { from: number; to: number } + '13C': { from: number; to: number } + nbPoints: number + lineWidth: number + } + '2d': { + nbPoints: { x: number; y: number } + } + autoExtendRange: boolean + spectra: Record +} + +// ============================================================================ +// Map experiment names to prediction keys +// ============================================================================ + +const experimentToPredictKey: Record = { + proton: 'H', + carbon: 'C', +} + +// ============================================================================ +// HELPER FUNCTIONS +// ============================================================================ + +export async function predictSpectra( + molfile: string, + spectra: Record, +): Promise { + const molecule = Molecule.fromMolfile(molfile) + + const predictOptions: Record = {} + for (const [experiment, enabled] of Object.entries(spectra)) { + if (!enabled) continue + const key = experimentToPredictKey[experiment] ?? experiment + predictOptions[key] = {} + } + + return predict(molecule, { predictOptions }) +} + +export function generateSpectra( + predictedSpectra: PredictedSpectraResult, + options: PredictionOptions, + color: string, +): Spectrum[] { + const clonedOptions = structuredClone(options) + checkFromTo(predictedSpectra, clonedOptions) + + const spectra: Spectrum[] = [] + + for (const [experiment, spectrum] of Object.entries(predictedSpectra)) { + if (!clonedOptions.spectra[experiment as Experiment]) continue + + switch (experiment) { + case 'proton': + case 'carbon': { + spectra.push( + generated1DSpectrum({ spectrum, options: clonedOptions, experiment, color }), + ) + break + } + case 'cosy': + case 'hsqc': + case 'hmbc': { + spectra.push( + generated2DSpectrum({ + spectrum: spectrum as PredictionBase2D, + options: clonedOptions, + experiment, + color, + }), + ) + break + } + default: + break + } + } + + return spectra +} + +// ============================================================================ +// ENGINE DEFINITION +// ============================================================================ + +export const nmrdbEngine = defineEngine({ + id: 'nmrdb.org', + name: 'NMRDB.org', + description: 'NMRDB.org prediction engine with 1D and 2D NMR support', + supportedSpectra: ['proton', 'carbon', 'cosy', 'hmbc', 'hsqc'], + + options: { + name: { + type: 'string', + description: 'Compound name', + default: '', + }, + frequency: { + type: 'number', + description: 'NMR frequency (MHz)', + default: 400, + }, + protonFrom: { + type: 'number', + description: 'Proton (1H) from in ppm', + default: -1, + }, + protonTo: { + type: 'number', + description: 'Proton (1H) to in ppm', + default: 12, + }, + carbonFrom: { + type: 'number', + description: 'Carbon (13C) from in ppm', + default: -5, + }, + carbonTo: { + type: 'number', + description: 'Carbon (13C) to in ppm', + default: 220, + }, + nbPoints1d: { + type: 'number', + description: '1D number of points', + default: 2 ** 17, + }, + lineWidth: { + type: 'number', + description: '1D line width', + default: 1, + }, + nbPoints2dX: { + type: 'number', + description: '2D spectrum X-axis points', + default: 1024, + }, + nbPoints2dY: { + type: 'number', + description: '2D spectrum Y-axis points', + default: 1024, + }, + autoExtendRange: { + type: 'boolean', + description: 'Auto extend range', + default: true, + }, + } as Record, + + requiredOptions: [], + + buildPayloadOptions(argv: Record): PredictionOptions { + const spectraObj: Record = { + carbon: false, + proton: false, + cosy: false, + hmbc: false, + hsqc: false, + } + + for (const experiment of argv.spectra as string[]) { + spectraObj[experiment as Experiment] = true + } + + return { + name: (argv.name as string) || '', + frequency: (argv.frequency as number) || 400, + '1d': { + '1H': { + from: (argv.protonFrom as number) ?? -1, + to: (argv.protonTo as number) ?? 12, + }, + '13C': { + from: (argv.carbonFrom as number) ?? -5, + to: (argv.carbonTo as number) ?? 220, + }, + nbPoints: (argv.nbPoints1d as number) || 2 ** 17, + lineWidth: (argv.lineWidth as number) || 1, + }, + '2d': { + nbPoints: { + x: (argv.nbPoints2dX as number) || 1024, + y: (argv.nbPoints2dY as number) || 1024, + }, + }, + spectra: spectraObj, + autoExtendRange: argv.autoExtendRange !== false, + } + }, + + async predict(structure, options) { + const predictionOptions = this.buildPayloadOptions(options) + + const { spectra } = await predictSpectra( + structure, + predictionOptions.spectra, + ) + + return generateSpectra(spectra, predictionOptions, 'red') + }, +}) \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/extractInfoFromSpectra.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/extractInfoFromSpectra.ts new file mode 100644 index 0000000..56a666d --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/extractInfoFromSpectra.ts @@ -0,0 +1,15 @@ +import { Experiment } from "../../base"; +import { spectraTypeMap, SpectraTypeMapItem } from "./spectraTypeMap"; + + + +export function extractInfoFromSpectra(spectra: Experiment[]) { + const info: SpectraTypeMapItem[] = []; + for (const experiment of spectra) { + const data = spectraTypeMap[experiment]; + if (!data) continue; + + info.push(data) + } + return info; +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/generatePredictedSpectrumData.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/generatePredictedSpectrumData.ts similarity index 100% rename from app/scripts/nmr-cli/src/prediction/generatePredictedSpectrumData.ts rename to app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/generatePredictedSpectrumData.ts diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/getNucleusFromSpectra.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/getNucleusFromSpectra.ts new file mode 100644 index 0000000..c23bffb --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/getNucleusFromSpectra.ts @@ -0,0 +1,13 @@ +import { Experiment } from "../../base" +import { spectraTypeMap } from "./spectraTypeMap" + +export function getNucleusFromSpectra(spectra: Experiment[]): string { + const nuclei = new Set() + for (const spectrum of spectra) { + const entry = spectraTypeMap[spectrum] + if (entry) { + nuclei.add(entry.nucleus) + } + } + return nuclei.size > 0 ? [...nuclei].join(',') : '1H' +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/spectraTypeMap.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/spectraTypeMap.ts new file mode 100644 index 0000000..729345f --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/core/spectraTypeMap.ts @@ -0,0 +1,12 @@ +import { Experiment } from "../../base"; + +export interface SpectraTypeMapItem { + type: string; nucleus: string +} + +export const spectraTypeMap: Partial> = { + proton: { type: 'nmr;1H;1d', nucleus: '1H' }, + carbon: + + { type: 'nmr;13C;1d', nucleus: '13C' }, +} diff --git a/app/scripts/nmr-cli/src/prediction/engines/nmrshift/nmrshift.engine.ts b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/nmrshift.engine.ts new file mode 100644 index 0000000..46788eb --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/nmrshift/nmrshift.engine.ts @@ -0,0 +1,264 @@ +import type { Options } from 'yargs' +import type { Spectrum } from '@zakodium/nmrium-core' +import https from 'https' +import axios from 'axios' + +import { defineEngine } from '../registry' +import type { Experiment } from '../base' +import type { GenerateSpectrumOptions, ShiftsItem } from './core/generatePredictedSpectrumData' +import { generatePredictedSpectrumData } from './core/generatePredictedSpectrumData' +import { extractInfoFromSpectra } from './core/extractInfoFromSpectra' +import type { SpectraTypeMapItem } from './core/spectraTypeMap' + +interface NMRShiftPayload { + id: number + type: string + shifts: string + solvent: string +} + +interface PredictionResponseItem { + id: number + type: string + statistics: { + accept: number + warning: number + reject: number + missing: number + total: number + } + shifts: ShiftsItem[] +} + +interface PredictionResponse { + result: PredictionResponseItem[] +} + +interface NMRShiftOptions { + id: number + shifts: string + solvent: string + spectra: Experiment[] +} + +type PredictionArgs = NMRShiftOptions & GenerateSpectrumOptions + +// ============================================================================ +// HELPERS +// ============================================================================ + +function getBaseUrl(): string { + const url = process.env['NMR_PREDICTION_URL'] + if (!url) { + throw new Error('Environment variable NMR_PREDICTION_URL is not defined.') + } + try { + new URL(url) + } catch { + throw new Error(`Invalid URL in NMR_PREDICTION_URL: "${url}"`) + } + return url +} + +async function callPredict( + structure: string, + options: { + id: number + shifts: string + solvent: string + experiments: SpectraTypeMapItem[] + }, +): Promise[]> { + const url = getBaseUrl() + const { id, shifts, solvent, experiments } = options + + const httpsAgent = new https.Agent({ rejectUnauthorized: false }) + + const requests = experiments.map((experimentInfo) => { + const payload: NMRShiftPayload = { + id, + type: experimentInfo.type, + shifts, + solvent, + } + + return axios.post(url, { + inputs: [payload], + moltxt: structure, + }, { + headers: { 'Content-Type': 'application/json' }, + httpsAgent, + }) + }) + + return Promise.all(requests) +} + +async function predictNMR( + structure: string, + options: PredictionArgs, +): Promise { + const { + id = 1, + shifts = '1', + solvent = 'Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)', + from, + to, + nbPoints = 1024, + frequency = 400, + lineWidth = 1, + tolerance = 0.001, + peakShape = 'lorentzian', + spectra, + } = options + + // Derive experiments (type + nucleus) from the --spectra list + const experiments = extractInfoFromSpectra(spectra) + + if (experiments.length === 0) { + throw new Error( + `No supported experiments found for spectra [${spectra.join(', ')}]. ` + + `Supported: proton, carbon.`, + ) + } + + const results = await callPredict(structure, { id, shifts, solvent, experiments }) + + const outputSpectra: Spectrum[] = [] + + for (let i = 0; i < results.length; i++) { + const response: PredictionResponse = results[i].data + const experimentInfo = experiments[i] + + for (const item of response.result) { + const data = generatePredictedSpectrumData(item.shifts, { + from, + to, + nbPoints, + lineWidth, + frequency, + tolerance, + peakShape, + }) + + if (!data) continue + + const name = crypto.randomUUID() + + outputSpectra.push({ + id: crypto.randomUUID(), + data, + info: { + isFid: false, + isComplex: false, + dimension: 1, + originFrequency: frequency, + baseFrequency: frequency, + pulseSequence: '', + solvent, + isFt: true, + name, + nucleus: experimentInfo.nucleus, + }, + } as unknown as Spectrum) + } + } + + return outputSpectra +} + +// ============================================================================ +// ENGINE DEFINITION +// ============================================================================ + +const SOLVENT_CHOICES = [ + 'Any', + 'Chloroform-D1 (CDCl3)', + 'Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)', + 'Methanol-D4 (CD3OD)', + 'Deuteriumoxide (D2O)', + 'Acetone-D6 ((CD3)2CO)', + 'TETRACHLORO-METHANE (CCl4)', + 'Pyridin-D5 (C5D5N)', + 'Benzene-D6 (C6D6)', + 'neat', + 'Tetrahydrofuran-D8 (THF-D8, C4D4O)', +] as const + +export const nmrshiftEngine = defineEngine({ + id: 'nmrshift', + name: 'NMRShift', + description: 'NMRShift prediction engine', + + supportedSpectra: ['proton', 'carbon'], + + options: { + id: { + alias: 'i', + type: 'number', + description: 'Input ID', + default: 1, + }, + shifts: { + type: 'string', + description: 'Chemical shifts', + default: '1', + }, + solvent: { + type: 'string', + description: 'NMR solvent', + default: 'Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)', + choices: SOLVENT_CHOICES, + }, + from: { + type: 'number', + description: 'From in (ppm) for spectrum generation', + }, + to: { + type: 'number', + description: 'To in (ppm) for spectrum generation', + }, + nbPoints: { + type: 'number', + description: 'Number of points for spectrum generation', + default: 1024, + }, + lineWidth: { + type: 'number', + description: 'Line width for spectrum generation', + default: 1, + }, + frequency: { + type: 'number', + description: 'NMR frequency (MHz) for spectrum generation', + default: 400, + }, + tolerance: { + type: 'number', + description: 'Tolerance to group peaks with close shift', + default: 0.001, + }, + peakShape: { + alias: 'ps', + type: 'string', + description: 'Peak shape algorithm', + default: 'lorentzian', + choices: ['gaussian', 'lorentzian'], + }, + } as Record, + + requiredOptions: ['solvent'], + + buildPayloadOptions(argv: Record): NMRShiftOptions { + return { + id: (argv.id as number) ?? 1, + shifts: (argv.shifts as string) ?? '1', + solvent: (argv.solvent as string) ?? 'Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)', + spectra: argv.spectra as Experiment[], + } + }, + + async predict(structure, options) { + return predictNMR(structure, options as unknown as PredictionArgs) + }, +}) \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/engines/registry.ts b/app/scripts/nmr-cli/src/prediction/engines/registry.ts new file mode 100644 index 0000000..c6ea442 --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/engines/registry.ts @@ -0,0 +1,60 @@ +import type { Engine } from './base' + +/** + * Auto-discovered engine registry + * Engines are automatically registered when imported + */ +class EngineRegistry { + private engines = new Map() + + /** + * Register an engine + * Called automatically when engine files are imported + */ + register(engine: Engine): void { + if (this.engines.has(engine.id)) { + console.warn(`Engine ${engine.id} is already registered, overwriting...`) + } + this.engines.set(engine.id, engine) + } + + /** + * Get an engine by ID + */ + get(id: string): Engine | undefined { + return this.engines.get(id) + } + + /** + * Get all registered engines + */ + getAll(): Engine[] { + return Array.from(this.engines.values()) + } + + /** + * Get all engine IDs + */ + getIds(): string[] { + return Array.from(this.engines.keys()) + } + + /** + * Check if an engine exists + */ + has(id: string): boolean { + return this.engines.has(id) + } +} + +// Singleton instance +export const engineRegistry = new EngineRegistry() + +/** + * Helper function to create and auto-register an engine + * Just call this at the bottom of your engine file! + */ +export function defineEngine(engine: Engine): Engine { + engineRegistry.register(engine) + return engine +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/index.ts b/app/scripts/nmr-cli/src/prediction/index.ts new file mode 100644 index 0000000..4041a77 --- /dev/null +++ b/app/scripts/nmr-cli/src/prediction/index.ts @@ -0,0 +1,251 @@ +import { Argv, CommandModule } from 'yargs' +import { readFileSync, existsSync, writeFileSync } from 'fs' +import { CURRENT_EXPORT_VERSION } from '@zakodium/nmrium-core' +import { engineRegistry } from './engines' +import type { Experiment } from './engines/base' + +// ============================================================================ +// STRUCTURE INPUT HANDLING +// ============================================================================ + +/** + * Resolves structure input from multiple sources: + * 1. File path (if file exists) + * 2. Stdin (if --stdin flag is used) + * 3. Inline MOL content (as fallback) + */ +function resolveStructureInput(options: { + structure?: string + stdin?: boolean + file?: string +}): string { + // Priority 1: Explicit file flag + if (options.file) { + if (!existsSync(options.file)) { + throw new Error(`File not found: ${options.file}`) + } + return readFileSync(options.file, 'utf-8') + } + + // Priority 2: Explicit stdin flag + if (options.stdin) { + return readStdinSync() + } + + // Priority 3: Structure argument (-s flag) - ALWAYS treat as inline content + if (options.structure) { + // Do NOT check existsSync here - just return it as inline MOL content + return options.structure.trimEnd().replaceAll(/\\n/g, '\n') + } + + throw new Error('No structure input provided. Use --file, --stdin, or -s') +} + +/** + * Synchronously read from stdin + * This works because yargs has already parsed args, so stdin is available + */ +function readStdinSync(): string { + try { + // File descriptor 0 is stdin + return readFileSync(0, 'utf-8') + } catch (error) { + throw new Error('Failed to read from stdin. Is data being piped?') + } +} + +// ============================================================================ +// COMMON OPTIONS +// ============================================================================ + +const commonOptions = { + engine: { + alias: 'e', + type: 'string', + description: 'Prediction engine', + demandOption: true, + choices: engineRegistry.getIds(), + }, + spectra: { + type: 'array', + description: 'Spectra types to predict', + demandOption: true, + choices: ['proton', 'carbon', 'cosy', 'hsqc', 'hmbc'], + }, + // Option 1: File path (most explicit) + file: { + alias: 'f', + type: 'string', + description: 'Path to MOL file', + conflicts: ['stdin', 'structure'], + }, + // Option 2: Stdin flag + stdin: { + type: 'boolean', + description: 'Read structure from stdin', + conflicts: ['file', 'structure'], + }, + // Option 3: Structure argument (inline MOL content only) + structure: { + alias: 's', + type: 'string', + description: 'Inline MOL content (use --file for file paths)', + conflicts: ['file', 'stdin'], + }, + output: { + alias: 'o', + type: 'string', + description: 'Output file path (default: stdout)', + } +} as const + +// ============================================================================ +// VALIDATION +// ============================================================================ + +function validateEngineOptions(argv: Record): void { + const engineId = argv.engine as string + const spectra = argv.spectra as string[] + const engine = engineRegistry.get(engineId) + + if (!engine) { + const available = engineRegistry.getIds().join(', ') + throw new Error(`Unknown engine "${engineId}". Available engines: ${available}`) + } + + const unsupportedSpectra = spectra.filter( + (s) => !engine.supportedSpectra.includes(s as Experiment), + ) + + if (unsupportedSpectra.length > 0) { + throw new Error( + `Engine "${engineId}" does not support: ${unsupportedSpectra.join(', ')}.\n` + + `Supported spectra: ${engine.supportedSpectra.join(', ')}`, + ) + } + + const missing = engine.requiredOptions.filter((opt) => !argv[opt]) + if (missing.length > 0) { + throw new Error( + `Engine "${engineId}" requires: ${missing.join(', ')}\n` + + `Usage: --${missing.join(' --')}`, + ) + } + + if (engine.validate) { + const result = engine.validate(argv) + if (result !== true) { + throw new Error(result) + } + } +} + +// ============================================================================ +// MAIN PREDICTION FUNCTION +// ============================================================================ + +async function predictNMR(options: Record): Promise { + const engineId = options.engine as string + const engine = engineRegistry.get(engineId)! + + // Resolve structure from input + const structure = resolveStructureInput({ + structure: options.structure as string | undefined, + stdin: options.stdin as boolean | undefined, + file: options.file as string | undefined, + }) + + // DEBUG LOGGING + console.error('[DEBUG] Received structure:', structure ? `${structure.length} chars` : 'undefined') + console.error('[DEBUG] Structure type:', typeof structure) + console.error('[DEBUG] Structure preview:', structure?.substring(0, 100)) + + // Validate structure is not empty + if (!structure || !structure.trim()) { + throw new Error('Structure input is empty or undefined') + } + + // Run prediction + const spectraResults = await engine.predict(structure, options) + + // Build NMRium output + const nmrium = { + data: { spectra: spectraResults }, + version: CURRENT_EXPORT_VERSION, + } + const output = JSON.stringify(nmrium) + + // Handle output destination + if (options.output) { + const outputPath = options.output as string + writeFileSync(outputPath, output, 'utf-8') + console.error(`Results written to ${outputPath}`) + } + + return output +} + +// ============================================================================ +// COMMAND MODULE +// ============================================================================ + +export const parsePredictionCommand: CommandModule<{}, Record> = { + command: ['predict', 'p'], + describe: 'Predict NMR spectrum from mol text', + builder: (yargs: Argv): Argv => { + let y = yargs.options(commonOptions) + + for (const engine of engineRegistry.getAll()) { + y = y.options(engine.options) as Argv + } + + return y + .check((argv) => { + // Ensure at least one input method is provided + if (!argv.file && !argv.stdin && !argv.structure) { + throw new Error( + 'Must provide structure input via --file, --stdin, or -s' + ) + } + return true + }) + .example( + '$0 predict -e myengine --spectra proton -f molecule.mol', + 'Predict from file path' + ) + .example( + '$0 predict -e myengine --spectra proton --stdin < molecule.mol', + 'Predict from stdin (redirect)' + ) + .example( + 'cat molecule.mol | $0 predict -e myengine --spectra proton --stdin', + 'Predict from stdin (pipe)' + ) + .example( + '$0 predict -e myengine --spectra proton -s "\\n MOL content..."', + 'Predict using -s with inline MOL content' + ) + .example( + '$0 predict -e myengine --spectra proton -f mol.mol -o results.json', + 'Save output to file' + ) + }, + handler: async (argv) => { + // DEBUG: See ALL arguments + console.error('[DEBUG] Full argv:', JSON.stringify(argv, null, 2)) + console.error('[DEBUG] argv.structure exists?', argv.structure !== undefined) + console.error('[DEBUG] argv.engine:', argv.engine) + console.error('[DEBUG] argv.spectra:', argv.spectra) + + try { + validateEngineOptions(argv) + const output = await predictNMR(argv) + console.log(output) + } catch (error) { + console.error('Error:', error instanceof Error ? error.message : String(error)) + console.error('Error stack:', error instanceof Error ? error.stack : '') + process.exit(1) + } + + }, +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/prediction/parsePredictionCommand.ts b/app/scripts/nmr-cli/src/prediction/parsePredictionCommand.ts deleted file mode 100644 index 4387a18..0000000 --- a/app/scripts/nmr-cli/src/prediction/parsePredictionCommand.ts +++ /dev/null @@ -1,248 +0,0 @@ -import { Argv, CommandModule, Options } from 'yargs' -import { - generatePredictedSpectrumData, - GenerateSpectrumOptions, - ShiftsItem, -} from './generatePredictedSpectrumData' -import { CURRENT_EXPORT_VERSION } from '@zakodium/nmrium-core' - -import https from 'https' -import axios from 'axios' - -interface PredictionParameters { - molText: string - id: number - type: string - shifts: string - solvent: string - nucleus: string -} - -const predictionOptions: { [key in keyof GenerateSpectrumOptions]: Options } = { - from: { - type: 'number', - description: 'From in (ppm)', - }, - to: { - type: 'number', - description: 'To in (ppm)', - }, - nbPoints: { - type: 'number', - description: 'Number of points', - default: 2 ** 18, // 256k points - }, - lineWidth: { - type: 'number', - description: 'Line width', - default: 1, - }, - frequency: { - type: 'number', - description: 'NMR frequency (MHz)', - default: 400, - }, - tolerance: { - type: 'number', - description: 'Tolerance', - default: 0.001, - }, - peakShape: { - alias: 'ps', - type: 'string', - description: 'Peak shape algorithm', - default: 'lorentzian', - choices: ['gaussian', 'lorentzian'], - }, -} as const - -const nmrOptions: { [key in keyof PredictionParameters]: Options } = { - id: { - alias: 'i', - type: 'number', - description: 'Input ID', - default: 1, - }, - type: { - alias: 't', - type: 'string', - description: 'NMR type', - default: 'nmr;1H;1d', - choices: ['nmr;1H;1d', 'nmr;13C;1d'], - }, - shifts: { - alias: 's', - type: 'string', - description: 'Chemical shifts', - default: '1', - }, - solvent: { - type: 'string', - description: 'NMR solvent', - default: 'Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)', - choices: [ - 'Any', - 'Chloroform-D1 (CDCl3)', - 'Dimethylsulphoxide-D6 (DMSO-D6, C2D6SO)', - 'Methanol-D4 (CD3OD)', - 'Deuteriumoxide (D2O)', - 'Acetone-D6 ((CD3)2CO)', - 'TETRACHLORO-METHANE (CCl4)', - 'Pyridin-D5 (C5D5N)', - 'Benzene-D6 (C6D6)', - 'neat', - 'Tetrahydrofuran-D8 (THF-D8, C4D4O)', - ], - }, - molText: { - alias: 'm', - type: 'string', - description: 'MOL file content', - requiresArg: true, - }, - nucleus: { - alias: 'n', - type: 'string', - description: 'Predicted nucleus', - requiresArg: true, - choices: ['1H', '13C'], - }, -} as const - -interface PredictionResponseItem { - id: number - type: string - statistics: { - accept: number - warning: number - reject: number - missing: number - total: number - } - shifts: ShiftsItem[] -} -interface PredictionResponse { - result: PredictionResponseItem[] -} - -async function predictNMR(options: PredictionArgs): Promise { - const url = process.env['NMR_PREDICTION_URL'] - - if (!url) { - throw new Error('Environment variable NMR_PREDICTION_URL is not defined.') - } - - try { - new URL(url).toString() - } catch { - throw new Error(`Invalid URL in NMR_PREDICTION_URL: "${url}"`) - } - - try { - const { - id, - type, - shifts, - solvent, - from, - to, - nbPoints = 2 ** 18, // 256K - frequency = 400, - lineWidth = 1, - tolerance = 0.001, - molText, - nucleus, - peakShape = 'lorentzian', - } = options - - const payload: any = { - inputs: [ - { - id, - type, - shifts, - solvent, - }, - ], - moltxt: molText.replaceAll(/\\n/g, '\n'), - } - - const httpsAgent = new https.Agent({ - rejectUnauthorized: false, - }) - - // Axios POST request with httpsAgent - const response = await axios.post(url, payload, { - headers: { - 'Content-Type': 'application/json', - }, - httpsAgent, - }) - - const responseResult: PredictionResponse = response.data - const spectra = [] - - for (const result of responseResult.result) { - const name = crypto.randomUUID() - const data = generatePredictedSpectrumData(result.shifts, { - from, - to, - nbPoints, - lineWidth, - frequency, - tolerance, - peakShape, - }) - - const info = { - isFid: false, - isComplex: false, - dimension: 1, - originFrequency: frequency, - baseFrequency: frequency, - pulseSequence: '', - solvent, - isFt: true, - name, - nucleus, - } - - spectra.push({ - id: crypto.randomUUID(), - data, - info, - }) - } - - const nmrium = { data: { spectra }, version: CURRENT_EXPORT_VERSION } - console.log(JSON.stringify(nmrium, null, 2)) - } catch (error) { - console.error( - 'Error:', - error instanceof Error ? error.message : String(error) - ) - - if (axios.isAxiosError(error) && error.response) { - console.error('Response data:', error.response.data) - } else if (error instanceof Error && error.cause) { - console.error('Network Error:', error.cause) - } - } -} - -type PredictionArgs = PredictionParameters & GenerateSpectrumOptions - -// Define the prediction string command -export const parsePredictionCommand: CommandModule<{}, PredictionArgs> = { - command: ['predict', 'p'], - describe: 'Predict NMR spectrum from mol text', - builder: (yargs: Argv<{}>): Argv => { - return yargs.options({ - ...nmrOptions, - ...predictionOptions, - }) as Argv - }, - handler: async argv => { - await predictNMR(argv) - }, -} diff --git a/app/scripts/nmr-cli/src/utilities/adjustAlpha.ts b/app/scripts/nmr-cli/src/utilities/adjustAlpha.ts new file mode 100644 index 0000000..9223381 --- /dev/null +++ b/app/scripts/nmr-cli/src/utilities/adjustAlpha.ts @@ -0,0 +1,10 @@ +function percentToHex(p: number): string { + const percent = Math.max(0, Math.min(100, p)); + const intValue = Math.round((percent / 100) * 255); + const hexValue = intValue.toString(16); + return percent === 100 ? '' : hexValue.padStart(2, '0'); +} + +export function adjustAlpha(color: string, factor: number): string { + return color + percentToHex(factor); +} \ No newline at end of file diff --git a/app/scripts/nmr-cli/src/utilities/isProton.ts b/app/scripts/nmr-cli/src/utilities/isProton.ts new file mode 100644 index 0000000..ad8c5e9 --- /dev/null +++ b/app/scripts/nmr-cli/src/utilities/isProton.ts @@ -0,0 +1,3 @@ +export function isProton(nucleus: string) { + return nucleus === '1H'; +} diff --git a/docker-compose.yml b/docker-compose.yml index baabbd4..4f62bc9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,12 +28,14 @@ services: env_file: - ./.env nmr-load-save: - #build: ./app/scripts/nmr-cli + # build: ./app/scripts/nmr-cli image: nfdi4chem/nmr-cli:dev-latest entrypoint: /bin/sh stdin_open: true tty: true container_name: nmr-converter + volumes: + - shared-data:/shared nmr-respredict: #build: ./app/scripts/nmr-respredict image: nfdi4chem/nmr-respredict:dev-latest diff --git a/env.template b/env.template index 6a4dca7..548cdb8 100644 --- a/env.template +++ b/env.template @@ -1,4 +1,4 @@ -POSTGRES_USER=sail +POSTGRES_USER=user POSTGRES_PASSWORD=password POSTGRES_SERVER=pgsql POSTGRES_PORT=5432 diff --git a/ops/deploy-script.sh b/ops/deploy-script.sh new file mode 100644 index 0000000..2fc7a02 --- /dev/null +++ b/ops/deploy-script.sh @@ -0,0 +1,87 @@ + #!/bin/bash + + # Define variables + PROJECT_DIR="/mnt/data/nmrkit" + COMPOSE_FILE="docker-compose-dev.yml" + NMRKIT_IMAGE="nfdi4chem/nmrkit:dev-latest" + NMR_CLI_IMAGE="nfdi4chem/nmr-cli:dev-latest" + LOG_FILE="/var/log/nmrkit-deploy.log" + LOG_OWNER="${SUDO_USER:-$(whoami)}" + + # Create log file if it doesn't exist + if [ ! -f "$LOG_FILE" ]; then + sudo touch "$LOG_FILE" + sudo chmod 644 "$LOG_FILE" + sudo chown "$LOG_OWNER":"$LOG_OWNER" "$LOG_FILE" + fi + + # Unified logging function + log_message() { + echo "$1" + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + # === Start of script === + log_message "🚀 ==========================================" + log_message "🚀 Starting NMRKit Deployment Script" + log_message "🚀 ==========================================" + + # Change to project directory to ensure paths resolve correctly + cd "$PROJECT_DIR/ops" || { + log_message "❌ Failed to change to directory $PROJECT_DIR/ops" + exit 1 + } + log_message "📂 Working directory: $(pwd)" + + # === Functions === + + # Cleanup function + cleanup() { + log_message "🧹 Cleaning up dangling images..." + docker image prune -f >/dev/null 2>&1 || true + log_message "✅ Cleanup completed" + } + + # Deploy a service by pulling latest image and recreating container if updated + deploy_service() { + local service_name=$1 + local image=$2 + + log_message "📦 Starting deployment for service: $service_name" + log_message "🔍 Checking for new image: $image" + + # Pull the latest image + if [ "$(docker pull "$image" | grep -c "Status: Image is up to date")" -eq 0 ]; then + log_message "✨ New image detected for $service_name" + log_message "🚀 Recreating container with updated image..." + docker compose -f "$COMPOSE_FILE" up -d --force-recreate --no-deps "$service_name" + log_message "✅ Deployment of $service_name completed successfully" + else + log_message "✅ Image for $service_name is up to date. Skipping deployment." + fi + } + + # Main deployment process + main() { + log_message "────────────────────────────────────────" + log_message "🔄 Deploying NMRKit API Service" + log_message "────────────────────────────────────────" + deploy_service "nmrkit-api" "$NMRKIT_IMAGE" + + log_message "" + log_message "────────────────────────────────────────" + log_message "🔄 Deploying NMR-Load-Save Service" + log_message "────────────────────────────────────────" + deploy_service "nmr-converter" "$NMR_CLI_IMAGE" + + log_message "" + cleanup + + log_message "" + log_message "🎉 ==========================================" + log_message "🎉 All Deployments Completed Successfully!" + log_message "🎉 ==========================================" + } + + # Execute main deployment + main diff --git a/ops/docker-compose-dev.yml b/ops/docker-compose-dev.yml index 5508f04..a7199f5 100644 --- a/ops/docker-compose-dev.yml +++ b/ops/docker-compose-dev.yml @@ -1,5 +1,3 @@ -version: "3.8" - services: traefik: image: traefik:v2.10 @@ -13,9 +11,9 @@ services: - 80:80 # - 8080:8080 # Optional: Expose Traefik dashboard on port 8080 volumes: - - /var/run/docker.sock:/var/run/docker.sock - - web: + - "/var/run/docker.sock:/var/run/docker.sock:ro" + nmrkit-api: + container_name: nmrkit-api image: nfdi4chem/nmrkit:dev-latest pull_policy: always labels: @@ -23,110 +21,42 @@ services: - "traefik.http.routers.web.rule=Host(`dev.nmrkit.nmrxiv.org`)" - "traefik.http.routers.web.entrypoints=web" - "traefik.http.services.web.loadbalancer.server.port=80" + volumes: + # - ../app:/code/app + - "/var/run/docker.sock:/var/run/docker.sock" + - shared-data:/shared healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:80/latest/registration/health"] - interval: 1m + test: + [ + "CMD", + "curl", + "-f", + "http://localhost:80/latest/registration/health" + ] + interval: 1m30s timeout: 10s - retries: 10 - start_period: 40s + retries: 20 + start_period: 60s env_file: - - ./.env - - nmr-load-save: + - ../.env + nmr-converter: + #build: ./app/scripts/nmr-cli + container_name: nmr-converter image: nfdi4chem/nmr-cli:dev-latest entrypoint: /bin/sh stdin_open: true tty: true - container_name: nmr-converter nmr-respredict: + #build: ./app/scripts/nmr-respredict + container_name: nmrkit-respredict image: nfdi4chem/nmr-respredict:dev-latest entrypoint: /bin/sh stdin_open: true tty: true - container_name: nmr-respredict volumes: - shared-data:/shared - - prometheus: - image: prom/prometheus - container_name: nmrkit_prometheus - ports: - - 9090:9090 - volumes: - - ./../prometheus_data/prometheus.yml:/etc/prometheus/prometheus.yml - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - grafana: - image: grafana/grafana - container_name: nmrkit_grafana - ports: - - 3000:3000 - volumes: - - /mnt/data/grafana_data:/var/lib/grafana - - redis: - image: "redis:alpine" - ports: - - "${FORWARD_REDIS_PORT:-6379}:6379" - volumes: - - "/mnt/data/redis:/data" - networks: - - default - healthcheck: - test: ["CMD", "redis-cli", "ping"] - retries: 3 - timeout: 5s - - pgsql: - image: "informaticsmatters/rdkit-cartridge-debian" - ports: - - "${FORWARD_DB_PORT:-5432}:5432" - env_file: - - ./.env - volumes: - - "/mnt/data/pgsql:/var/lib/postgresql/data" - networks: - - default - healthcheck: - test: - [ - "CMD", - "pg_isready", - "-q", - "-d", - "${POSTGRES_DB}", - "-U", - "${POSTGRES_USER}", - ] - retries: 3 - timeout: 5s - minio: - image: 'minio/minio:latest' - ports: - - '${FORWARD_MINIO_PORT:-9001}:9001' - - '${FORWARD_MINIO_CONSOLE_PORT:-8900}:8900' - environment: - - ./.env - volumes: - - /mnt/data/minio:/data/minio - networks: - - default - command: minio server /data/minio --console-address ":8900" - volumes: - prometheus_data: - driver: local - driver_opts: - o: bind - type: none - device: /mnt/data/prometheus_data - grafana_data: - driver: local - driver_opts: - o: bind - type: none - device: /mnt/data/grafana_data + shared-data: networks: default: name: nmrkit_vpc diff --git a/ops/docker-compose-prod.yml b/ops/docker-compose-prod.yml index a5c1bd3..c5d6838 100644 --- a/ops/docker-compose-prod.yml +++ b/ops/docker-compose-prod.yml @@ -1,5 +1,3 @@ -version: "3.8" - services: traefik: image: traefik:v2.10 @@ -13,105 +11,52 @@ services: - 80:80 # - 8080:8080 # Optional: Expose Traefik dashboard on port 8080 volumes: - - /var/run/docker.sock:/var/run/docker.sock - - web: - image: nfdi4chem/nmrkit:v0.1.0 + - "/var/run/docker.sock:/var/run/docker.sock:ro" + nmrkit-api: + container_name: nmrkit-api + image: nfdi4chem/nmrkit:latest pull_policy: always labels: - "traefik.enable=true" - - "traefik.http.routers.web.rule=Host(`dev.nmrkit.nmrxiv.org`)" + - "traefik.http.routers.web.rule=Host(`nmrkit.nmrxiv.org`)" - "traefik.http.routers.web.entrypoints=web" - "traefik.http.services.web.loadbalancer.server.port=80" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:80/latest/registration/health"] - interval: 1m - timeout: 10s - retries: 10 - start_period: 40s - env_file: - - ./.env - - prometheus: - image: prom/prometheus - container_name: nmrkit_prometheus - ports: - - 9090:9090 - volumes: - - ./../prometheus_data/prometheus.yml:/etc/prometheus/prometheus.yml - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - grafana: - image: grafana/grafana - container_name: nmrkit_grafana - ports: - - 3000:3000 - volumes: - - /mnt/data/grafana_data:/var/lib/grafana - - redis: - image: "redis:alpine" - ports: - - "${FORWARD_REDIS_PORT:-6379}:6379" - volumes: - - "/mnt/data/redis:/data" - networks: - - default - healthcheck: - test: ["CMD", "redis-cli", "ping"] - retries: 3 - timeout: 5s - - pgsql: - image: "informaticsmatters/rdkit-cartridge-debian" - ports: - - "${FORWARD_DB_PORT:-5432}:5432" - env_file: - - ./.env volumes: - - "/mnt/data/pgsql:/var/lib/postgresql/data" - networks: - - default + # - ../app:/code/app + - "/var/run/docker.sock:/var/run/docker.sock" + - shared-data:/shared healthcheck: test: [ - "CMD", - "pg_isready", - "-q", - "-d", - "${POSTGRES_DB}", - "-U", - "${POSTGRES_USER}", + "CMD", + "curl", + "-f", + "http://localhost:80/latest/registration/health" ] - retries: 3 - timeout: 5s - minio: - image: 'minio/minio:latest' - ports: - - '${FORWARD_MINIO_PORT:-9001}:9001' - - '${FORWARD_MINIO_CONSOLE_PORT:-8900}:8900' - environment: - - ./.env + interval: 1m30s + timeout: 10s + retries: 20 + start_period: 60s + env_file: + - ../.env + nmr-converter: + #build: ./app/scripts/nmr-cli + container_name: nmr-converter + image: nfdi4chem/nmr-cli:latest + entrypoint: /bin/sh + stdin_open: true + tty: true + nmr-respredict: + #build: ./app/scripts/nmr-respredict + container_name: nmrkit-respredict + image: nfdi4chem/nmr-respredict:latest + entrypoint: /bin/sh + stdin_open: true + tty: true volumes: - - /mnt/data/minio:/data/minio - networks: - - default - command: minio server /data/minio --console-address ":8900" - + - shared-data:/shared volumes: - prometheus_data: - driver: local - driver_opts: - o: bind - type: none - device: /mnt/data/prometheus_data - grafana_data: - driver: local - driver_opts: - o: bind - type: none - device: /mnt/data/grafana_data + shared-data: networks: default: name: nmrkit_vpc diff --git a/ops/zero-downtime-deployment-script.sh b/ops/zero-downtime-deployment-script.sh index 0f91cb9..2fc7a02 100644 --- a/ops/zero-downtime-deployment-script.sh +++ b/ops/zero-downtime-deployment-script.sh @@ -1,87 +1,87 @@ -#!/bin/bash - -# Define variables -COMPOSE_FILE="/mnt/data/nmrkit/ops/docker-compose-dev.yml" -DOCKER_REPO_NAME="nfdi4chem/nmrkit:dev-latest" -IMAGE_NAME="nfdi4chem/nmrkit:dev-latest" -NEW_CONTAINER_ID="" -IS_CONTAINER_HEALTHY=1 - -# Function to check the health of the container -check_health() { - - HEALTH=$(docker inspect --format='{{json .State.Health.Status}}' $NEW_CONTAINER_ID) - - if [[ $HEALTH == *"healthy"* ]]; then - echo "Container is healthy." - return 0 - else - echo "Container is unhealthy or still starting" - return 1 + #!/bin/bash + + # Define variables + PROJECT_DIR="/mnt/data/nmrkit" + COMPOSE_FILE="docker-compose-dev.yml" + NMRKIT_IMAGE="nfdi4chem/nmrkit:dev-latest" + NMR_CLI_IMAGE="nfdi4chem/nmr-cli:dev-latest" + LOG_FILE="/var/log/nmrkit-deploy.log" + LOG_OWNER="${SUDO_USER:-$(whoami)}" + + # Create log file if it doesn't exist + if [ ! -f "$LOG_FILE" ]; then + sudo touch "$LOG_FILE" + sudo chmod 644 "$LOG_FILE" + sudo chown "$LOG_OWNER":"$LOG_OWNER" "$LOG_FILE" fi -} - -# Check if there is a new image available in the Docker repository -if [ "$(docker pull $DOCKER_REPO_NAME | grep "Status: Image is up to date" | wc -l)" -eq 0 ]; then - - # Scale up a new container - echo "Scale up new container.." - docker-compose -f $COMPOSE_FILE up -d --scale web=2 --no-recreate - - NEW_CONTAINER_ID=$(docker ps -q -l) - - echo "New Container Id is.." - echo "$NEW_CONTAINER_ID" - - # Wait for new containers to start and health checks to pass - echo "Waiting for the new containers to start and health check to pass retry 5 times.." - n=0; - while [ $n -le 10 ] - do - if ! check_health; then - n=$(( $n + 1 )) - sleep 1m - echo "Container not healthy.. Check again.." - else - IS_CONTAINER_HEALTHY=0 - break - fi - done - - # Remove old containers and images - if [ $IS_CONTAINER_HEALTHY == 0 ] ; then - - # Set the desired container name prefix - CONTAINER_NAME_PREFIX="ops_web" - - # Retrieve the container IDs that match the prefix - container_ids=$(docker ps -a --filter "name=^/${CONTAINER_NAME_PREFIX}" --format "{{.ID}}") - - # Sort the container IDs by creation date in ascending order - sorted_container_ids=$(echo "$container_ids" | xargs docker inspect --format='{{.Created}} {{.ID}}' | sort | awk '{print $2}') - - # Get the oldest container ID - oldest_container_id=$(echo "$sorted_container_ids" | head -n 1) - - # Check if any container IDs were found - if [[ -z "$oldest_container_id" ]]; then - echo "No containers found with the name prefix '${CONTAINER_NAME_PREFIX}'." - exit 1 + # Unified logging function + log_message() { + echo "$1" + echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" >> "$LOG_FILE" + } + + # === Start of script === + log_message "🚀 ==========================================" + log_message "🚀 Starting NMRKit Deployment Script" + log_message "🚀 ==========================================" + + # Change to project directory to ensure paths resolve correctly + cd "$PROJECT_DIR/ops" || { + log_message "❌ Failed to change to directory $PROJECT_DIR/ops" + exit 1 + } + log_message "📂 Working directory: $(pwd)" + + # === Functions === + + # Cleanup function + cleanup() { + log_message "🧹 Cleaning up dangling images..." + docker image prune -f >/dev/null 2>&1 || true + log_message "✅ Cleanup completed" + } + + # Deploy a service by pulling latest image and recreating container if updated + deploy_service() { + local service_name=$1 + local image=$2 + + log_message "📦 Starting deployment for service: $service_name" + log_message "🔍 Checking for new image: $image" + + # Pull the latest image + if [ "$(docker pull "$image" | grep -c "Status: Image is up to date")" -eq 0 ]; then + log_message "✨ New image detected for $service_name" + log_message "🚀 Recreating container with updated image..." + docker compose -f "$COMPOSE_FILE" up -d --force-recreate --no-deps "$service_name" + log_message "✅ Deployment of $service_name completed successfully" + else + log_message "✅ Image for $service_name is up to date. Skipping deployment." fi - - # Delete the old container and unused images - docker stop $oldest_container_id - docker rm $oldest_container_id - docker image prune -af - echo "Deleted the oldest container with ID: ${oldest_container_id}" - - else - echo "Couldnot complete the deployment as the container is unhealthy.." - docker stop $NEW_CONTAINER_ID - docker rm $NEW_CONTAINER_ID - fi - -else - echo "Skipping deployment as no new image available.." -fi + } + + # Main deployment process + main() { + log_message "────────────────────────────────────────" + log_message "🔄 Deploying NMRKit API Service" + log_message "────────────────────────────────────────" + deploy_service "nmrkit-api" "$NMRKIT_IMAGE" + + log_message "" + log_message "────────────────────────────────────────" + log_message "🔄 Deploying NMR-Load-Save Service" + log_message "────────────────────────────────────────" + deploy_service "nmr-converter" "$NMR_CLI_IMAGE" + + log_message "" + cleanup + + log_message "" + log_message "🎉 ==========================================" + log_message "🎉 All Deployments Completed Successfully!" + log_message "🎉 ==========================================" + } + + # Execute main deployment + main diff --git a/package-lock.json b/package-lock.json index 9d5820d..e581cdc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -120,6 +120,7 @@ "version": "4.19.1", "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.19.1.tgz", "integrity": "sha512-mBecfMFS4N+yK/p0ZbK53vrZbL6OtWMk8YmnOv1i0LXx4pelY8TFhqKoTit3NPVPwoSNN0vdSN9dTu1xr1XOVw==", + "peer": true, "dependencies": { "@algolia/client-common": "4.19.1", "@algolia/requester-common": "4.19.1", @@ -991,6 +992,7 @@ "version": "4.19.1", "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.19.1.tgz", "integrity": "sha512-IJF5b93b2MgAzcE/tuzW0yOPnuUyRgGAtaPv5UUywXM8kzqfdwZTO4sPJBzoGz1eOy6H9uEchsJsBFTELZSu+g==", + "peer": true, "dependencies": { "@algolia/cache-browser-local-storage": "4.19.1", "@algolia/cache-common": "4.19.1", @@ -1123,6 +1125,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001517", "electron-to-chromium": "^1.4.477", @@ -1342,6 +1345,7 @@ "version": "7.5.2", "resolved": "https://registry.npmjs.org/focus-trap/-/focus-trap-7.5.2.tgz", "integrity": "sha512-p6vGNNWLDGwJCiEjkSK6oERj/hEyI9ITsSwIUICBoKLlWiTWXJRfQibCwcoi50rTZdbi87qDtUlMCmQwsGSgPw==", + "peer": true, "dependencies": { "tabbable": "^6.2.0" } @@ -1713,6 +1717,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", @@ -2009,6 +2014,7 @@ "version": "3.3.3", "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.3.tgz", "integrity": "sha512-A0KgSkef7eE4Mf+nKJ83i75TMyq8HqY3qmFIJSWy8bNt0v1lG7jUcpGpoTFxAwYcWOphcTBLPPJg+bDfhDf52w==", + "peer": true, "dependencies": { "@alloc/quick-lru": "^5.2.0", "arg": "^5.0.2", @@ -2126,6 +2132,7 @@ "version": "4.4.8", "resolved": "https://registry.npmjs.org/vite/-/vite-4.4.8.tgz", "integrity": "sha512-LONawOUUjxQridNWGQlNizfKH89qPigK36XhMI7COMGztz8KNY0JHim7/xDd71CZwGT4HtSRgI7Hy+RlhG0Gvg==", + "peer": true, "dependencies": { "esbuild": "^0.18.10", "postcss": "^8.4.26", @@ -2180,6 +2187,7 @@ "version": "1.0.0-beta.7", "resolved": "https://registry.npmjs.org/vitepress/-/vitepress-1.0.0-beta.7.tgz", "integrity": "sha512-P9Rw+FXatKIU4fVdtKxqwHl6fby8E/8zE3FIfep6meNgN4BxbWqoKJ6yfuuQQR9IrpQqwnyaBh4LSabyll6tWg==", + "peer": true, "dependencies": { "@docsearch/css": "^3.5.1", "@docsearch/js": "^3.5.1", @@ -2213,6 +2221,7 @@ "version": "3.3.4", "resolved": "https://registry.npmjs.org/vue/-/vue-3.3.4.tgz", "integrity": "sha512-VTyEYn3yvIeY1Py0WaYGZsXnz3y5UnGi62GjVEqvEGPl6nxbOrCXbVOTQWBEJUqAyTUk2uJ5JLVnYJ6ZzGbrSw==", + "peer": true, "dependencies": { "@vue/compiler-dom": "3.3.4", "@vue/compiler-sfc": "3.3.4", @@ -2332,6 +2341,7 @@ "version": "4.19.1", "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-4.19.1.tgz", "integrity": "sha512-mBecfMFS4N+yK/p0ZbK53vrZbL6OtWMk8YmnOv1i0LXx4pelY8TFhqKoTit3NPVPwoSNN0vdSN9dTu1xr1XOVw==", + "peer": true, "requires": { "@algolia/client-common": "4.19.1", "@algolia/requester-common": "4.19.1", @@ -2824,6 +2834,7 @@ "version": "4.19.1", "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.19.1.tgz", "integrity": "sha512-IJF5b93b2MgAzcE/tuzW0yOPnuUyRgGAtaPv5UUywXM8kzqfdwZTO4sPJBzoGz1eOy6H9uEchsJsBFTELZSu+g==", + "peer": true, "requires": { "@algolia/cache-browser-local-storage": "4.19.1", "@algolia/cache-common": "4.19.1", @@ -2914,6 +2925,7 @@ "version": "4.21.10", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.10.tgz", "integrity": "sha512-bipEBdZfVH5/pwrvqc+Ub0kUPVfGUhlKxbvfD+z1BDnPEO/X98ruXGA1WP5ASpAFKan7Qr6j736IacbZQuAlKQ==", + "peer": true, "requires": { "caniuse-lite": "^1.0.30001517", "electron-to-chromium": "^1.4.477", @@ -3072,6 +3084,7 @@ "version": "7.5.2", "resolved": "https://registry.npmjs.org/focus-trap/-/focus-trap-7.5.2.tgz", "integrity": "sha512-p6vGNNWLDGwJCiEjkSK6oERj/hEyI9ITsSwIUICBoKLlWiTWXJRfQibCwcoi50rTZdbi87qDtUlMCmQwsGSgPw==", + "peer": true, "requires": { "tabbable": "^6.2.0" } @@ -3331,6 +3344,7 @@ "version": "8.4.27", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.27.tgz", "integrity": "sha512-gY/ACJtJPSmUFPDCHtX78+01fHa64FaU4zaaWfuh1MhGJISufJAH4cun6k/8fwsHYeK4UQmENQK+tRLCFJE8JQ==", + "peer": true, "requires": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", @@ -3504,6 +3518,7 @@ "version": "3.3.3", "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.3.tgz", "integrity": "sha512-A0KgSkef7eE4Mf+nKJ83i75TMyq8HqY3qmFIJSWy8bNt0v1lG7jUcpGpoTFxAwYcWOphcTBLPPJg+bDfhDf52w==", + "peer": true, "requires": { "@alloc/quick-lru": "^5.2.0", "arg": "^5.0.2", @@ -3587,6 +3602,7 @@ "version": "4.4.8", "resolved": "https://registry.npmjs.org/vite/-/vite-4.4.8.tgz", "integrity": "sha512-LONawOUUjxQridNWGQlNizfKH89qPigK36XhMI7COMGztz8KNY0JHim7/xDd71CZwGT4HtSRgI7Hy+RlhG0Gvg==", + "peer": true, "requires": { "esbuild": "^0.18.10", "fsevents": "~2.3.2", @@ -3598,6 +3614,7 @@ "version": "1.0.0-beta.7", "resolved": "https://registry.npmjs.org/vitepress/-/vitepress-1.0.0-beta.7.tgz", "integrity": "sha512-P9Rw+FXatKIU4fVdtKxqwHl6fby8E/8zE3FIfep6meNgN4BxbWqoKJ6yfuuQQR9IrpQqwnyaBh4LSabyll6tWg==", + "peer": true, "requires": { "@docsearch/css": "^3.5.1", "@docsearch/js": "^3.5.1", @@ -3628,6 +3645,7 @@ "version": "3.3.4", "resolved": "https://registry.npmjs.org/vue/-/vue-3.3.4.tgz", "integrity": "sha512-VTyEYn3yvIeY1Py0WaYGZsXnz3y5UnGi62GjVEqvEGPl6nxbOrCXbVOTQWBEJUqAyTUk2uJ5JLVnYJ6ZzGbrSw==", + "peer": true, "requires": { "@vue/compiler-dom": "3.3.4", "@vue/compiler-sfc": "3.3.4",