From e149d83919147753483bdf96638f6c71c79e7920 Mon Sep 17 00:00:00 2001
From: Venkata Nainala <mailcs76@gmail.com>
Date: Tue, 17 Feb 2026 15:37:05 +0100
Subject: [PATCH 1/2] docs: improve OpenAPI descriptions, examples, and
 response codes

Add detailed descriptions, request/response examples, and proper HTTP
response codes across all routers. Add OpenAPI tag metadata and a rich
API description to the FastAPI app configuration.
---
 app/main.py                 |  58 ++++++++++-
 app/routers/chem.py         | 121 +++++++++++++--------
 app/routers/converter.py    |  41 ++++++--
 app/routers/predict.py      |  93 +++++++++++++----
 app/routers/registration.py | 203 ++++++++++++++++++++++++++++++------
 5 files changed, 411 insertions(+), 105 deletions(-)

diff --git a/app/main.py b/app/main.py
index 9160d18..b9fdb6f 100644
--- a/app/main.py
+++ b/app/main.py
@@ -13,9 +13,62 @@
 from prometheus_fastapi_instrumentator import Instrumentator
 from app.schemas import HealthCheck
 
+DESCRIPTION = """
+## NMR Kit API
+
+A Python-based microservice for **storing**, **parsing**, **converting**, and
+**predicting** NMR (Nuclear Magnetic Resonance) spectra.
+
+### Modules
+
+| Module | Description |
+|--------|-------------|
+| **Chemistry** | Generate HOSE codes, label atoms via ALATIS |
+| **Spectra** | Parse NMR spectra from files or URLs |
+| **Converter** | Convert NMR raw data to NMRium JSON |
+| **Predict** | Predict NMR spectra using nmrdb.org or nmrshift engines |
+| **Registration** | Register and query molecules via lwreg |
+
+### Links
+
+* [Documentation](https://nfdi4chem.github.io/nmrkit)
+* [Source Code](https://github.com/NFDI4Chem/nmrkit)
+"""
+
+tags_metadata = [
+    {
+        "name": "healthcheck",
+        "description": "Health check endpoints to verify service availability.",
+    },
+    {
+        "name": "chem",
+        "description": "Chemistry operations including HOSE code generation and atom labeling.",
+    },
+    {
+        "name": "spectra",
+        "description": "Parse NMR spectra from uploaded files or remote URLs.",
+    },
+    {
+        "name": "converter",
+        "description": "Convert NMR raw data into NMRium-compatible JSON format.",
+    },
+    {
+        "name": "predict",
+        "description": (
+            "Predict NMR spectra from molecular structures using "
+            "**nmrdb.org** or **nmrshift** prediction engines."
+        ),
+    },
+    {
+        "name": "registration",
+        "description": "Register, query, and retrieve molecules using the lwreg registration system.",
+    },
+]
+
 app = FastAPI(
     title=config.PROJECT_NAME,
-    description="Python-based microservice to store and predict spectra.",
+    version=config.VERSION,
+    description=DESCRIPTION,
     terms_of_service="https://nfdi4chem.github.io/nmrkit",
     contact={
         "name": "Steinbeck Lab",
@@ -26,6 +79,7 @@
         "name": "CC BY 4.0",
         "url": "https://creativecommons.org/licenses/by/4.0/",
     },
+    openapi_tags=tags_metadata,
 )
 
 app.include_router(registration.router)
@@ -42,6 +96,7 @@
     version_format="{major}",
     prefix_format="/v{major}",
     enable_latest=True,
+    description=DESCRIPTION,
     terms_of_service="https://nfdi4chem.github.io/nmrkit",
     contact={
         "name": "Steinbeck Lab",
@@ -52,6 +107,7 @@
         "name": "CC BY 4.0",
         "url": "https://creativecommons.org/licenses/by/4.0/",
     },
+    openapi_tags=tags_metadata,
 )
 
 Instrumentator().instrument(app).expose(app)
diff --git a/app/routers/chem.py b/app/routers/chem.py
index 5a4f803..d7c8c00 100644
--- a/app/routers/chem.py
+++ b/app/routers/chem.py
@@ -1,11 +1,9 @@
 from typing import Annotated
 from psycopg2.errors import UniqueViolation
 from app.modules.cdkmodules import getCDKHOSECodes
-from fastapi import APIRouter, HTTPException, status, Query, Body
+from fastapi import APIRouter, HTTPException, status, Query
 from app.modules.rdkitmodules import getRDKitHOSECodes
 from app.schemas import HealthCheck
-from app.schemas.alatis import AlatisModel
-import requests
 
 router = APIRouter(
     prefix="/chem",
@@ -41,23 +39,91 @@ def get_health() -> HealthCheck:
 @router.get(
     "/hosecode",
     tags=["chem"],
-    summary="Generates HOSE codes of molecule",
+    summary="Generate HOSE codes for a molecule",
+    description=(
+        "Generate **Hierarchically Ordered Spherical Environment (HOSE)** codes "
+        "for every atom in the given molecule. HOSE codes encode the local chemical "
+        "environment around each atom up to a configurable number of spheres.\n\n"
+        "Supports two cheminformatics frameworks:\n"
+        "- **CDK** (Chemistry Development Kit) — default, supports stereo\n"
+        "- **RDKit** — alternative implementation"
+    ),
     response_model=list[str],
-    response_description="Returns an array of hose codes generated",
+    response_description="Array of HOSE code strings, one per atom in the molecule",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {
+            "description": "Successfully generated HOSE codes",
+            "content": {
+                "application/json": {
+                    "example": [
+                        "C(CC,CC,&)",
+                        "C(CC,C&,&)",
+                        "C(CC,CC,&)",
+                        "C(CCC,CC&,&)",
+                        "C(CC,CC,CC)",
+                        "C(CC,CC,CC)",
+                    ]
+                }
+            },
+        },
+        409: {"description": "Molecule already exists (unique constraint violation)"},
+        422: {"description": "Error parsing the molecular structure"},
+    },
 )
 async def HOSE_Codes(
-    smiles: Annotated[str, Query(examples=["CCCC1CC1"])],
-    framework: Annotated[str, Query(enum=["cdk", "rdkit"])] = "cdk",
-    spheres: Annotated[int, Query()] = 3,
-    usestereo: Annotated[bool, Query()] = False,
+    smiles: Annotated[
+        str,
+        Query(
+            description="SMILES string representing the molecular structure",
+            example="CCCC1CC1",
+            examples=[
+                "CCCC1CC1",
+                "c1ccccc1",
+                "CC(=O)O",
+                "CCO",
+                "C1CCCCC1",
+                "CC(=O)Oc1ccccc1C(=O)O",
+            ],
+        ),
+    ],
+    framework: Annotated[
+        str,
+        Query(
+            enum=["cdk", "rdkit"],
+            description="Cheminformatics framework to use for HOSE code generation",
+        ),
+    ] = "cdk",
+    spheres: Annotated[
+        int,
+        Query(
+            description="Number of spheres (bond distance) to consider around each atom",
+            ge=1,
+            le=10,
+        ),
+    ] = 3,
+    usestereo: Annotated[
+        bool,
+        Query(
+            description="Whether to include stereochemistry information in HOSE codes (CDK only)",
+        ),
+    ] = False,
 ) -> list[str]:
     """
-    ## Generates HOSE codes for a given molecule
-    Endpoint to generate HOSE codes based on each atom in the given molecule.
+    ## Generate HOSE codes for a given molecule
 
-    Returns:
-        HOSE Codes: An array of hose codes generated
+    Generates HOSE (Hierarchically Ordered Spherical Environment) codes based on
+    each atom in the given molecule. These codes are widely used in NMR chemical
+    shift prediction.
+
+    ### Parameters
+    - **smiles**: A valid SMILES string (e.g. `CCCC1CC1`)
+    - **framework**: Choose `cdk` (default) or `rdkit`
+    - **spheres**: Number of bond spheres to encode (default: 3)
+    - **usestereo**: Include stereochemistry in codes (CDK only, default: false)
+
+    ### Returns
+    An array of HOSE code strings, one for each atom in the molecule.
     """
     try:
         if framework == "cdk":
@@ -78,32 +144,3 @@ async def HOSE_Codes(
             detail="Error parsing the structure " + e.message,
             headers={"X-Error": "RDKit molecule input parse error"},
         )
-
-
-@router.post(
-    "/label-atoms",
-    tags=["chem"],
-    summary="Label atoms using ALATIS naming system",
-    response_model=AlatisModel,
-    response_description="",
-    status_code=status.HTTP_200_OK,
-)
-async def label_atoms(data: Annotated[str, Body(embed=False, media_type="text/plain")]):
-    """
-    ## Generates atom labels for a given molecule
-
-    Returns:
-        JSON with various representations
-    """
-    try:
-        url = "http://alatis.nmrfam.wisc.edu/upload"
-        payload = {"input_text": data, "format": "format_", "response_type": "json"}
-        response = requests.request("POST", url, data=payload)
-        response.raise_for_status()  # Raise an error for bad status codes
-        return response.json()
-    except Exception as e:
-        raise HTTPException(
-            status_code=422,
-            detail=f"Error parsing the structure: {str(e)}",
-            headers={"X-Error": "RDKit molecule input parse error"},
-        )
diff --git a/app/routers/converter.py b/app/routers/converter.py
index 23cd4dd..bae9e78 100644
--- a/app/routers/converter.py
+++ b/app/routers/converter.py
@@ -1,5 +1,5 @@
 import subprocess
-from fastapi import APIRouter, HTTPException, status, Response
+from fastapi import APIRouter, HTTPException, status, Response, Query
 from app.schemas import HealthCheck
 
 router = APIRouter(
@@ -36,17 +36,42 @@ def get_health() -> HealthCheck:
 @router.get(
     "/spectra",
     tags=["converter"],
-    summary="Load and convert NMR raw data",
-    # response_model=List[int],
-    response_description="Load and convert NMR raw data",
+    summary="Convert NMR raw data to NMRium JSON",
+    description=(
+        "Fetch NMR raw data from a remote URL and convert it into "
+        "[NMRium](https://www.nmrium.org/)-compatible JSON format. "
+        "The conversion is performed by the **nmr-cli** tool running "
+        "inside a Docker container.\n\n"
+        "Supported input formats include Bruker, JCAMP-DX, and other "
+        "formats recognized by nmr-cli."
+    ),
+    response_description="NMRium-compatible JSON representation of the NMR data",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {
+            "description": "Successfully converted NMR data to NMRium JSON",
+            "content": {"application/json": {}},
+        },
+        500: {"description": "Conversion failed or Docker container not available"},
+    },
 )
-async def nmr_load_save(url: str):
+async def nmr_load_save(
+    url: str = Query(
+        ...,
+        description="URL pointing to the NMR raw data file to convert",
+        examples=["https://example.com/nmr-data/sample.zip"],
+    ),
+):
     """
-    ## Return nmrium json
+    ## Convert NMR raw data to NMRium JSON
 
-    Returns:
-        Return nmrium json
+    Fetches NMR raw data from the provided URL and converts it into NMRium JSON format.
+
+    ### Parameters
+    - **url**: A publicly accessible URL pointing to the NMR raw data
+
+    ### Returns
+    NMRium-compatible JSON object containing the converted spectra data.
     """
     process = subprocess.Popen(
         ["docker exec nmr-converter nmr-cli -u " + url],
diff --git a/app/routers/predict.py b/app/routers/predict.py
index 2a289db..cf7fa56 100644
--- a/app/routers/predict.py
+++ b/app/routers/predict.py
@@ -15,7 +15,12 @@
     prefix="/predict",
     tags=["predict"],
     dependencies=[],
-    responses={404: {"description": "Not Found"}},
+    responses={
+        404: {"description": "Not Found"},
+        408: {"description": "Prediction timed out"},
+        422: {"description": "Invalid input or NMR CLI error"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
 )
 
 # Container name for nmr-cli (from docker-compose.yml)
@@ -468,21 +473,40 @@ def get_health() -> HealthCheck:
 @router.post(
     "/",
     tags=["predict"],
-    summary="Predict NMR spectra from MOL string",
-    response_description="Predicted spectra in NMRium JSON format",
+    summary="Predict NMR spectra from a MOL string",
+    description=(
+        "Submit a molecular structure as a MOL block string and predict NMR spectra "
+        "using one of the supported prediction engines.\n\n"
+        "### Supported Engines\n\n"
+        "| Engine | Spectra Types | Typical Time |\n"
+        "|--------|--------------|-------------|\n"
+        "| **nmrshift** | proton, carbon | ~5-10s |\n"
+        "| **nmrdb.org** | proton, carbon, cosy, hsqc, hmbc | ~30-60s |\n\n"
+        "> **Note:** nmrdb.org predictions can take 30-60 seconds. "
+        "Consider using curl or Postman instead of the Swagger UI for long-running requests."
+    ),
+    response_description="Predicted spectra in NMRium-compatible JSON format",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {"description": "Successfully predicted NMR spectra"},
+        400: {"description": "Unknown engine type"},
+        408: {"description": "Prediction timed out (nmrdb.org: 300s, nmrshift: 120s)"},
+        422: {"description": "Invalid structure or NMR CLI error"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
 )
 async def predict_from_structure(request: PredictRequest):
     """
-    ## Predict NMR spectra from MOL string
+    ## Predict NMR spectra from a MOL string
 
     **Note:** nmrdb.org predictions take 30-60s. Use curl/Postman, not Swagger.
 
-    **Engines:**
-    - **nmrshift** — Supports: proton, carbon
-    - **nmrdb.org** — Supports: proton, carbon, cosy, hsqc, hmbc
+    ### Engines
 
-    **Example (nmrshift):**
+    - **nmrshift** — Supports: `proton`, `carbon`
+    - **nmrdb.org** — Supports: `proton`, `carbon`, `cosy`, `hsqc`, `hmbc`
+
+    ### Example (nmrshift)
     ```json
     {
         "engine": "nmrshift",
@@ -498,7 +522,7 @@ async def predict_from_structure(request: PredictRequest):
     }
     ```
 
-    **Example (nmrdb.org):**
+    ### Example (nmrdb.org)
     ```json
     {
         "engine": "nmrdb.org",
@@ -530,27 +554,52 @@ async def predict_from_structure(request: PredictRequest):
 @router.post(
     "/file",
     tags=["predict"],
-    summary="Predict NMR spectra from uploaded MOL file",
-    response_description="Predicted spectra in NMRium JSON format",
+    summary="Predict NMR spectra from an uploaded MOL file",
+    description=(
+        "Upload a MOL file and predict NMR spectra using one of the supported engines. "
+        "Engine configuration is passed as a JSON string in the `request` form field.\n\n"
+        "### Supported Engines\n\n"
+        "| Engine | Spectra Types | Typical Time |\n"
+        "|--------|--------------|-------------|\n"
+        "| **nmrshift** | proton, carbon | ~5-10s |\n"
+        "| **nmrdb.org** | proton, carbon, cosy, hsqc, hmbc | ~30-60s |\n\n"
+        "> **Note:** nmrdb.org predictions can take 30-60 seconds. "
+        "Consider using curl or Postman instead of the Swagger UI for long-running requests."
+    ),
+    response_description="Predicted spectra in NMRium-compatible JSON format",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {"description": "Successfully predicted NMR spectra from file"},
+        400: {"description": "Unknown engine type"},
+        408: {"description": "Prediction timed out (nmrdb.org: 300s, nmrshift: 120s)"},
+        422: {"description": "Invalid JSON in request field or NMR CLI error"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
 )
 async def predict_from_file(
-    file: UploadFile = File(..., description="MOL file"),
-    request: str = Form(..., description="""JSON string with engine, spectra and options. Examples:
-
-nmrshift: {"engine": "nmrshift", "spectra": ["proton"], "options": {"solvent": "Chloroform-D1 (CDCl3)", "frequency": 400, "nbPoints": 1024, "lineWidth": 1, "peakShape": "lorentzian"}}
-
-nmrdb.org: {"engine": "nmrdb.org", "spectra": ["proton", "carbon"], "options": {"name": "Benzene", "frequency": 400, "1d": {"proton": {"from": -1, "to": 12}, "carbon": {"from": -5, "to": 220}, "nbPoints": 131072, "lineWidth": 1}, "autoExtendRange": true}}
-"""),
+    file: UploadFile = File(..., description="MOL file containing the molecular structure"),
+    request: str = Form(
+        ...,
+        description=(
+            'JSON string with engine, spectra, and options. '
+            'Example (nmrshift): {"engine": "nmrshift", "spectra": ["proton"], '
+            '"options": {"solvent": "Chloroform-D1 (CDCl3)", "frequency": 400, '
+            '"nbPoints": 1024, "lineWidth": 1, "peakShape": "lorentzian"}} — '
+            'Example (nmrdb.org): {"engine": "nmrdb.org", "spectra": ["proton", "carbon"], '
+            '"options": {"name": "Benzene", "frequency": 400, '
+            '"1d": {"proton": {"from": -1, "to": 12}, "carbon": {"from": -5, "to": 220}, '
+            '"nbPoints": 131072, "lineWidth": 1}, "autoExtendRange": true}}'
+        ),
+    ),
 ):
     """
-    ## Predict NMR spectra from uploaded MOL file
+    ## Predict NMR spectra from an uploaded MOL file
 
     Upload a MOL file and pass engine options as a JSON string in the `request` field.
 
-    **Note:** nmrdb.org predictions take 30-60s. Use curl/Postman, not Swagger.
+    > **Note:** nmrdb.org predictions take 30-60s. Use curl/Postman, not Swagger.
 
-    **nmrshift example request field:**
+    ### nmrshift example `request` field
     ```json
     {
         "engine": "nmrshift",
@@ -565,7 +614,7 @@ async def predict_from_file(
     }
     ```
 
-    **nmrdb.org example request field:**
+    ### nmrdb.org example `request` field
     ```json
     {
         "engine": "nmrdb.org",
diff --git a/app/routers/registration.py b/app/routers/registration.py
index cdf27ab..1f92b28 100644
--- a/app/routers/registration.py
+++ b/app/routers/registration.py
@@ -1,6 +1,6 @@
 from typing import List, Annotated, Union
 from app.core.config import LWREG_CONFIG
-from fastapi import APIRouter, HTTPException, Body, status
+from fastapi import APIRouter, HTTPException, Body, Query, status
 from lwreg.utils import (
     initdb,
     bulk_register,
@@ -16,7 +16,10 @@
     prefix="/registration",
     tags=["registration"],
     dependencies=[],
-    responses={404: {"description": "Not Found"}},
+    responses={
+        404: {"description": "Not Found"},
+        500: {"description": "Internal server error"},
+    },
 )
 
 
@@ -34,7 +37,7 @@ def get_health() -> HealthCheck:
     """
     ## Perform a Health Check
     Endpoint to perform a healthcheck on. This endpoint can primarily be used by Docker
-    to ensure a robust container orchestration and management is in place. Other
+    to ensure a robust container orchestration and management are in place. Other
     services which rely on the proper functioning of the API service will not deploy if this
     endpoint returns any other HTTP status code except 200 (OK).
     Returns:
@@ -46,20 +49,42 @@ def get_health() -> HealthCheck:
 @router.post(
     "/init",
     tags=["registration"],
-    summary="Initializes the registration database",
-    response_description="Returns boolean indicating the success of the initialisation",
+    summary="Initialize the registration database",
+    description=(
+        "Initialize (or re-initialize) the molecule registration database. "
+        "**Warning:** This operation destroys all existing data in the registration "
+        "database. Set `confirm` to `true` to proceed."
+    ),
+    response_description="Boolean indicating whether initialization was successful",
     status_code=status.HTTP_200_OK,
     response_model=Union[bool, None],
+    responses={
+        200: {
+            "description": "Database initialized successfully",
+            "content": {"application/json": {"example": True}},
+        },
+    },
 )
-async def initialise_database(confirm: Annotated[bool, Body(embed=True)] = False):
+async def initialise_database(
+    confirm: Annotated[
+        bool,
+        Body(
+            embed=True,
+            description="Set to true to confirm database initialization. False returns immediately.",
+        ),
+    ] = False,
+):
     """
-    ## Initializes the registration database
+    ## Initialize the registration database
 
-    NOTE: This call destroys any existing information in the registration database
+    > **WARNING:** This call destroys any existing information in the registration database.
 
-    Arguments:
+    ### Parameters
+    - **confirm**: Must be set to `true` to actually perform the initialization.
+      If `false` (default), the call returns immediately without changes.
 
-    confirm -- if set to False we immediately return
+    ### Returns
+    `true` if initialization was successful, `null` if confirm was `false`.
     """
     return initdb(config=LWREG_CONFIG, confirm=confirm)
 
@@ -67,23 +92,78 @@ async def initialise_database(confirm: Annotated[bool, Body(embed=True)] = False
 @router.post(
     "/register",
     tags=["registration"],
-    summary="Registers new molecules",
-    response_description="Returns the new registry number(s) (molregno). If all entries are duplicates exception is raised",
+    summary="Register new molecules",
+    description=(
+        "Register one or more molecules in the database. Accepts SMILES strings "
+        "(one per line) or an SDF block as plain text. Returns the new registry "
+        "numbers (molregnos) for successfully registered molecules.\n\n"
+        "Duplicate molecules are flagged as `DUPLICATE` and parse failures as "
+        "`PARSE_FAILURE` in the response array."
+    ),
+    response_description="Array of registry numbers (integers) or status strings (DUPLICATE, PARSE_FAILURE)",
     status_code=status.HTTP_200_OK,
     response_model=List[Union[str, int]],
+    responses={
+        200: {
+            "description": "Molecules registered successfully",
+            "content": {
+                "application/json": {
+                    "example": [1, "DUPLICATE", 3],
+                }
+            },
+        },
+        422: {"description": "Registration failed — all entries are duplicates or unparseable"},
+    },
 )
 async def register_compounds(
-    data: Annotated[str, Body(embed=False, media_type="text/plain")] = "CCCC"
+    data: Annotated[
+        str,
+        Body(
+            embed=False,
+            media_type="text/plain",
+            description=(
+                "Molecular data as plain text. Provide either SMILES strings "
+                "(one per line) or an SDF block (containing $$$$ delimiters)."
+            ),
+            openapi_examples={
+                "smiles": {
+                    "summary": "SMILES input",
+                    "description": "One or more SMILES strings, one per line",
+                    "value": "CCCC\nCCCCO\nc1ccccc1",
+                },
+                "sdf": {
+                    "summary": "SDF block",
+                    "description": "An SDF block with $$$$ delimiters",
+                    "value": (
+                        "\n  Mrv2311 08092305412D\n\n"
+                        "  3  2  0  0  0  0            999 V2000\n"
+                        "   -0.4018    0.6926    0.0000 C   0  0  0  0  0  0\n"
+                        "    0.3127    1.1051    0.0000 C   0  0  0  0  0  0\n"
+                        "    1.0272    0.6926    0.0000 O   0  0  0  0  0  0\n"
+                        "  1  2  1  0  0  0  0\n"
+                        "  2  3  1  0  0  0  0\n"
+                        "M  END\n$$$$"
+                    ),
+                },
+            },
+        ),
+    ] = "CCCC",
 ):
     """
-    ## Registers new molecules, assuming it doesn't already exist,
-    and returns the new registry number(s) (molregno). If all entries
-    are duplicates exception is raised
+    ## Register new molecules
+
+    Registers one or more molecules (assuming they don't already exist) and returns
+    the new registry number(s) (molregno).
 
-    #### Only one of the molecule format objects should be provided
+    ### Input Formats
+    - **SMILES** — one SMILES string per line
+    - **SDF block** — MOL/SDF format with `$$$$` delimiters
 
-    molblock   -- MOL or SDF block
-    smiles     -- smiles
+    ### Response
+    An array where each element is either:
+    - An **integer** — the new molregno for a successfully registered molecule
+    - `"DUPLICATE"` — the molecule already exists in the database
+    - `"PARSE_FAILURE"` — the molecule could not be parsed
     """
     try:
         if "$$$$" in data:
@@ -120,17 +200,45 @@ async def register_compounds(
 @router.get(
     "/query",
     tags=["registration"],
-    summary="Queries to see if a molecule has already been registered",
+    summary="Query if a molecule is already registered",
+    description=(
+        "Check whether a molecule (given as a SMILES string) has already been "
+        "registered in the database. Returns the corresponding registry numbers "
+        "(molregnos) if found."
+    ),
     response_model=List[int],
-    response_description="Returns the corresponding registry numbers (molregnos)",
+    response_description="Array of matching registry numbers (molregnos)",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {
+            "description": "Query completed successfully",
+            "content": {
+                "application/json": {
+                    "example": [42, 108],
+                }
+            },
+        },
+        500: {"description": "Internal server error during query"},
+    },
 )
-async def query_compounds(smi: str):
+async def query_compounds(
+    smi: str = Query(
+        ...,
+        description="SMILES string of the molecule to query",
+        examples=["CCCC", "c1ccccc1", "CCO"],
+    ),
+):
     """
-    ## Queries to see if a molecule has already been registered
+    ## Query if a molecule is already registered
 
-    Returns:
-        Corresponding registry numbers (molregnos)
+    Checks the registration database for the given molecule.
+
+    ### Parameters
+    - **smi**: A valid SMILES string
+
+    ### Returns
+    An array of integer registry numbers (molregnos) matching the query.
+    Returns an empty array if the molecule is not registered.
     """
     try:
         res = query(smiles=smi, config=LWREG_CONFIG)
@@ -142,18 +250,49 @@ async def query_compounds(smi: str):
 @router.post(
     "/retrieve",
     tags=["registration"],
-    summary="Retrieves entries based on the list of IDs provided",
+    summary="Retrieve registered molecules by ID",
+    description=(
+        "Retrieve one or more registered molecules by their registry IDs (molregnos). "
+        "Returns the molecular data and format for each requested ID."
+    ),
     response_model=tuple(),
-    response_description="Returns HTTP Status Code 200 (OK)",
+    response_description="Array of (molregno, data, format) tuples for each requested ID",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {
+            "description": "Successfully retrieved molecule data",
+            "content": {
+                "application/json": {
+                    "example": [
+                        [1, "CCCC", "smiles"],
+                        [2, "CCO", "smiles"],
+                    ],
+                }
+            },
+        },
+        500: {"description": "Internal server error during retrieval"},
+    },
 )
-async def retrieve_compounds(ids: List[int]):
+async def retrieve_compounds(
+    ids: List[int] = Body(
+        ...,
+        description="List of registry IDs (molregnos) to retrieve",
+        examples=[[1, 2, 3]],
+    ),
+):
     """
-    ## Retrieves entries based on the ids provided
+    ## Retrieve registered molecules by ID
 
-    Returns:
-        Molecule data for one or more registry ids (molregnos).
-        The return value is a tuple of (molregno, data, format) 3-tuples
+    Fetches molecule data for one or more registry IDs (molregnos).
+
+    ### Request Body
+    A JSON array of integer registry IDs.
+
+    ### Returns
+    An array of `[molregno, data, format]` tuples containing:
+    - **molregno** — the registry number
+    - **data** — the molecular data (SMILES, MOL block, etc.)
+    - **format** — the format of the data
     """
     try:
         res = retrieve(ids=ids, config=LWREG_CONFIG)

From 5d6dccb4bc030dc1f58b92c687b629292fa73d75 Mon Sep 17 00:00:00 2001
From: Venkata Nainala <mailcs76@gmail.com>
Date: Tue, 17 Feb 2026 15:37:31 +0100
Subject: [PATCH 2/2] feat(spectra): add parse-publication-string endpoint

Add POST /spectra/parse/publication-string endpoint that resurrects an
NMR spectrum from an ACS-style publication string. The endpoint accepts
the publication string as a plain text body and invokes the nmr-cli
parse-publication-string command via Docker exec.

Uses StreamingResponse with Content-Disposition attachment header to
prevent Swagger UI from hanging on the large spectrum JSON response.
Also improves OpenAPI docs for existing spectra endpoints.
---
 app/routers/spectra.py | 248 +++++++++++++++++++++++++++++++++++------
 1 file changed, 217 insertions(+), 31 deletions(-)

diff --git a/app/routers/spectra.py b/app/routers/spectra.py
index ea2c05d..6563d8c 100644
--- a/app/routers/spectra.py
+++ b/app/routers/spectra.py
@@ -1,4 +1,6 @@
-from fastapi import APIRouter, HTTPException, status, UploadFile, File, Form
+from fastapi import APIRouter, Body, HTTPException, status, UploadFile, File, Form
+from fastapi.responses import StreamingResponse
+import io
 from app.schemas import HealthCheck
 from pydantic import BaseModel, HttpUrl, Field
 import subprocess
@@ -11,7 +13,12 @@
     prefix="/spectra",
     tags=["spectra"],
     dependencies=[],
-    responses={404: {"description": "Not Found"}},
+    responses={
+        404: {"description": "Not Found"},
+        408: {"description": "Processing timeout exceeded"},
+        422: {"description": "Error parsing the spectra input"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
 )
 
 # Container name for nmr-cli (from docker-compose.yml)
@@ -19,21 +26,41 @@
 
 
 class UrlParseRequest(BaseModel):
-    """Request model for parsing spectra from URL"""
-    url: HttpUrl = Field(..., description="URL of the spectra file")
+    """Request model for parsing spectra from a remote URL."""
+
+    url: HttpUrl = Field(
+        ...,
+        description="URL pointing to the NMR spectra file to parse",
+        json_schema_extra={
+            "examples": ["https://example.com/spectra/sample.jdx"],
+        },
+    )
     capture_snapshot: bool = Field(
         False,
-        description="Generate an image snapshot of the spectra"
+        description="Generate an image snapshot of the spectra",
     )
     auto_processing: bool = Field(
         False,
-        description="Enable automatic processing of spectrum (FID → FT spectra)"
+        description="Enable automatic processing of spectrum (FID → FT spectra)",
     )
     auto_detection: bool = Field(
         False,
-        description="Enable ranges and zones automatic detection"
+        description="Enable ranges and zones automatic detection",
     )
 
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "url": "https://example.com/spectra/sample.jdx",
+                    "capture_snapshot": False,
+                    "auto_processing": False,
+                    "auto_detection": False,
+                }
+            ]
+        }
+    }
+
 
 @router.get("/", include_in_schema=False)
 @router.get(
@@ -117,6 +144,57 @@ def run_command(
         )
 
 
+def run_publication_string_command(publication_string: str) -> dict:
+    """Execute nmr-cli parse-publication-string command in Docker container."""
+
+    cmd = ["nmr-cli", "parse-publication-string", publication_string]
+
+    try:
+        result = subprocess.run(
+            ["docker", "exec", NMR_CLI_CONTAINER] + cmd,
+            capture_output=True,
+            text=False,
+            timeout=120
+        )
+    except subprocess.TimeoutExpired:
+        raise HTTPException(
+            status_code=408,
+            detail="Processing timeout exceeded"
+        )
+    except FileNotFoundError:
+        raise HTTPException(
+            status_code=500,
+            detail="Docker not found or nmr-converter container not running."
+        )
+
+    if result.returncode != 0:
+        error_msg = result.stderr.decode(
+            "utf-8") if result.stderr else "Unknown error"
+        raise HTTPException(
+            status_code=422,
+            detail=f"NMR CLI error: {error_msg}"
+        )
+
+    stdout = result.stdout.decode("utf-8").strip()
+
+    if not stdout:
+        raise HTTPException(
+            status_code=422,
+            detail="NMR CLI returned empty output. The publication string may be invalid or unrecognized."
+        )
+
+    # Validate that stdout is valid JSON without fully deserializing
+    try:
+        json.loads(stdout)
+    except json.JSONDecodeError as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Invalid JSON from NMR CLI: {e}"
+        )
+
+    return stdout
+
+
 def copy_file_to_container(local_path: str, container_path: str) -> None:
     """Copy a file to the nmr-converter container."""
     try:
@@ -150,37 +228,52 @@ def remove_file_from_container(container_path: str) -> None:
 @router.post(
     "/parse/file",
     tags=["spectra"],
-    summary="Parse spectra from uploaded file",
-    response_description="Spectra data in JSON format",
+    summary="Parse spectra from an uploaded file",
+    description=(
+        "Upload an NMR spectra file (JCAMP-DX, Bruker, etc.) and parse it into "
+        "structured JSON. The file is processed by the **nmr-cli** tool running "
+        "inside a Docker container.\n\n"
+        "Supported file formats include JCAMP-DX (`.jdx`, `.dx`), Bruker directories "
+        "(zipped), and other formats supported by nmr-cli."
+    ),
+    response_description="Parsed spectra data in NMRium-compatible JSON format",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {"description": "Successfully parsed the spectra file"},
+        408: {"description": "Processing timeout exceeded (120s limit)"},
+        422: {"description": "Error parsing the spectra file"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
 )
 async def parse_spectra_from_file(
-    file: UploadFile = File(..., description="Upload a spectra file"),
+    file: UploadFile = File(..., description="NMR spectra file to parse (JCAMP-DX, Bruker zip, etc.)"),
     capture_snapshot: bool = Form(
         False,
-        description="Generate an image snapshot of the spectra"
+        description="Generate an image snapshot of the spectra",
     ),
     auto_processing: bool = Form(
         False,
-        description="Enable automatic processing of spectrum (FID → FT spectra)"
+        description="Enable automatic processing of spectrum (FID → FT spectra)",
     ),
     auto_detection: bool = Form(
         False,
-        description="Enable ranges and zones automatic detection"
+        description="Enable ranges and zones automatic detection",
     ),
 ):
     """
-    ## Parse spectra from uploaded file
+    ## Parse spectra from an uploaded file
 
-    Upload a spectra file along with processing options using multipart/form-data.
+    Upload an NMR spectra file along with processing options using `multipart/form-data`.
 
-    Processing Options:
-    - `capture_snapshot (s)` : Capture snapshot of the spectra
-    - `auto_processing  (p)` : Enable automatic processing of spectrum (FID → FT spectra)
-    - `auto_detection   (d)` : Enable ranges and zones automatic detection
+    ### Processing Options
+    | Option | Description |
+    |--------|-------------|
+    | `capture_snapshot` | Capture an image snapshot of the spectra |
+    | `auto_processing` | Automatically process FID → FT spectra |
+    | `auto_detection` | Automatically detect ranges and zones |
 
-    Returns:
-        Spectra data in JSON format
+    ### Returns
+    Parsed spectra data in NMRium-compatible JSON format.
     """
 
     local_tmp_path = None
@@ -227,23 +320,36 @@ async def parse_spectra_from_file(
 @router.post(
     "/parse/url",
     tags=["spectra"],
-    summary="Parse spectra from URL",
-    response_description="Spectra data in JSON format",
+    summary="Parse spectra from a remote URL",
+    description=(
+        "Provide a URL pointing to an NMR spectra file and parse it into structured "
+        "JSON. The file is fetched and processed by the **nmr-cli** tool running "
+        "inside a Docker container."
+    ),
+    response_description="Parsed spectra data in NMRium-compatible JSON format",
     status_code=status.HTTP_200_OK,
+    responses={
+        200: {"description": "Successfully parsed the spectra from URL"},
+        408: {"description": "Processing timeout exceeded (120s limit)"},
+        422: {"description": "Error parsing spectra from the provided URL"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
 )
 async def parse_spectra_from_url(request: UrlParseRequest):
     """
-    Parse spectra from URL
+    ## Parse spectra from a remote URL
 
-    Provide a URL to a spectra file along with processing options using JSON body.
+    Provide a URL to an NMR spectra file along with processing options in the JSON body.
 
-    Processing Options:
-    - `capture_snapshot (s)` : Capture snapshot of the spectra
-    - `auto_processing  (p)` : Enable automatic processing of spectrum (FID → FT spectra)
-    - `auto_detection   (d)` : Enable ranges and zones automatic detection
+    ### Processing Options
+    | Option | Description |
+    |--------|-------------|
+    | `capture_snapshot` | Capture an image snapshot of the spectra |
+    | `auto_processing` | Automatically process FID → FT spectra |
+    | `auto_detection` | Automatically detect ranges and zones |
 
-    Returns:
-        Spectra data in JSON format
+    ### Returns
+    Parsed spectra data in NMRium-compatible JSON format.
     """
     try:
         return run_command(
@@ -260,3 +366,83 @@ async def parse_spectra_from_url(request: UrlParseRequest):
             status_code=422,
             detail=f"Error parsing spectra from URL: {e}"
         )
+
+
+@router.post(
+    "/parse/publication-string",
+    tags=["spectra"],
+    summary="Resurrect NMR spectrum from an ACS publication string",
+    description=(
+        "Parse an ACS-style NMR publication string and resurrect it into a full "
+        "NMRium-compatible spectrum. The publication string is processed by the "
+        "**nmr-cli** `parse-publication-string` command running inside a Docker "
+        "container.\n\n"
+        "The string is parsed to extract nucleus, solvent, and chemical shift ranges, "
+        "which are then used to reconstruct the spectrum data (x/y arrays) at 400 MHz "
+        "with 131072 points.\n\n"
+        "### Example publication strings\n"
+        "- `1H NMR (400 MHz, CDCl3) δ 7.26 (s, 1H), 2.10 (s, 3H)`\n"
+        "- `13C NMR (101 MHz, DMSO-d6) δ 170.1, 136.5, 128.7`"
+    ),
+    response_description="Resurrected spectrum in NMRium-compatible JSON format",
+    status_code=status.HTTP_200_OK,
+    responses={
+        200: {"description": "Successfully resurrected spectrum from publication string"},
+        408: {"description": "Processing timeout exceeded (120s limit)"},
+        422: {"description": "Invalid publication string or NMR CLI error"},
+        500: {"description": "Docker or nmr-converter container not available"},
+    },
+)
+async def parse_publication_string(
+    publication_string: str = Body(
+        ...,
+        media_type="text/plain",
+        openapi_examples={
+            "1H proton": {
+                "summary": "1H NMR example",
+                "value": "1H NMR (400 MHz, CDCl3) δ 7.26 (s, 1H), 2.10 (s, 3H)",
+            },
+            "13C carbon": {
+                "summary": "13C NMR example",
+                "value": "13C NMR (101 MHz, DMSO-d6) δ 170.1, 136.5, 128.7",
+            },
+        },
+    ),
+):
+    """
+    ## Resurrect NMR spectrum from a publication string
+
+    Send the ACS-style NMR publication string directly as the request body
+    (plain text, no JSON wrapping).
+
+    ### Example request body
+    ```
+    1H NMR (400 MHz, CDCl3) δ 7.26 (s, 1H), 2.10 (s, 3H)
+    ```
+
+    ### Returns
+    NMRium-compatible JSON with spectrum data, ranges, and metadata.
+    """
+    if not publication_string or not publication_string.strip():
+        raise HTTPException(
+            status_code=422,
+            detail="Publication string cannot be empty."
+        )
+
+    try:
+        raw_json = run_publication_string_command(publication_string.strip())
+        return StreamingResponse(
+            io.BytesIO(raw_json.encode("utf-8")),
+            media_type="application/json",
+            headers={
+                "Content-Disposition": "attachment; filename=nmrium-spectrum.json",
+            },
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=422,
+            detail=f"Error parsing publication string: {e}"
+        )