Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 43 additions & 15 deletions bento_reference_service/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
import json
from bento_lib.db.pg_async import PgAsyncDatabase
from fastapi import Depends
from functools import lru_cache
from functools import lru_cache, partial
from pathlib import Path
from structlog.stdlib import BoundLogger
from typing import Annotated, AsyncIterator, Literal
from typing import Annotated, AsyncIterator, Literal, LiteralString

from .config import Config, ConfigDependency
from .logger import LoggerDependency
from .models import (
Alias,
ContigWithRefgetURI,
ContigLink,
Genome,
GenomeWithURIs,
GenomeGFF3Patch,
Expand Down Expand Up @@ -64,16 +65,27 @@ def deserialize_contig(self, rec: asyncpg.Record | dict) -> ContigWithRefgetURI:
),
)

def deserialize_genome(self, rec: asyncpg.Record, external_resource_uris: bool) -> GenomeWithURIs:
def deserialize_contig_link(self, genome_id: str, rec: asyncpg.Record | dict) -> ContigLink:
name: str = rec["contig_name"]
return ContigLink(name=name, href=f"{self._service_base_url}/genomes/{genome_id}/contigs/{name}")

def deserialize_genome(
self, rec: asyncpg.Record, external_resource_uris: bool, full_contigs: bool = True
) -> GenomeWithURIs:
genome_id = rec["id"]
genome_uri = f"{self._service_base_url}/genomes/{genome_id}"

if full_contigs:
deserialize_contig = self.deserialize_contig
else:
deserialize_contig = partial(self.deserialize_contig_link, genome_id)

return GenomeWithURIs(
id=genome_id,
# aliases is [None] if no aliases defined:
aliases=tuple(map(Database.deserialize_alias, json.loads(rec["aliases"]))) if rec["aliases"] else (),
uri=genome_uri,
contigs=tuple(map(self.deserialize_contig, json.loads(rec["contigs"]))),
contigs=tuple(map(deserialize_contig, json.loads(rec["contigs"]))),
md5=rec["md5_checksum"],
ga4gh=rec["ga4gh_checksum"],
fasta=f"{genome_uri}.fa" if external_resource_uris else rec["fasta_uri"],
Expand All @@ -96,6 +108,7 @@ async def _select_genomes(
g_ids: list[str] | None,
taxon_id: str | None = None,
external_resource_uris: bool = False,
full_contigs: bool = True,
) -> AsyncIterator[GenomeWithURIs]:
where_items: list[str] = []
q_params: list[str | int] = []
Expand All @@ -111,6 +124,21 @@ def _q_param(pv: str | int) -> str:
if taxon_id:
where_items.append(f"taxon_id = {_q_param(taxon_id)}")

contig_select_items: LiteralString = (
(
"""
contig_name, contig_length, circular, md5_checksum, ga4gh_checksum,
(
SELECT jsonb_agg(gca.*)
FROM genome_contig_aliases gca
WHERE g.id = gca.genome_id AND gc.contig_name = gca.contig_name
) aliases
"""
)
if full_contigs
else "contig_name"
)

conn: asyncpg.Connection
async with self.connect() as conn:
res = await conn.fetch(
Expand All @@ -130,13 +158,7 @@ def _q_param(pv: str | int) -> str:
) aliases,
(
WITH contigs_tmp AS (
SELECT
contig_name, contig_length, circular, md5_checksum, ga4gh_checksum,
(
SELECT jsonb_agg(gca.*)
FROM genome_contig_aliases gca
WHERE g.id = gca.genome_id AND gc.contig_name = gca.contig_name
) aliases
SELECT {contig_select_items}
FROM genome_contigs gc WHERE g.id = gc.genome_id
)
SELECT jsonb_agg(contigs_tmp.*) FROM contigs_tmp
Expand All @@ -146,16 +168,22 @@ def _q_param(pv: str | int) -> str:
*q_params,
)

for r in map(lambda g: self.deserialize_genome(g, external_resource_uris), res):
for r in map(lambda g: self.deserialize_genome(g, external_resource_uris, full_contigs), res):
yield r

async def get_genomes(
self, g_ids: list[str] | None = None, taxon_id: str | None = None, external_resource_uris: bool = False
) -> tuple[GenomeWithURIs, ...]:
return tuple([r async for r in self._select_genomes(g_ids, taxon_id, external_resource_uris)])
return tuple(
[r async for r in self._select_genomes(g_ids, taxon_id, external_resource_uris, full_contigs=False)]
)

async def get_genome(self, g_id: str, *, external_resource_uris: bool = False) -> GenomeWithURIs | None:
return await anext(self._select_genomes([g_id], external_resource_uris=external_resource_uris), None)
async def get_genome(
self, g_id: str, *, external_resource_uris: bool = False, full_contigs: bool = False
) -> GenomeWithURIs | None:
return await anext(
self._select_genomes([g_id], external_resource_uris=external_resource_uris, full_contigs=full_contigs), None
)

async def delete_genome(self, g_id: str) -> None:
conn: asyncpg.Connection
Expand Down
10 changes: 8 additions & 2 deletions bento_reference_service/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"Alias",
"Contig",
"ContigWithRefgetURI",
"ContigLink",
"Genome",
"GenomeWithURIs",
"GenomeGFF3Patch",
Expand Down Expand Up @@ -51,6 +52,11 @@ class ContigWithRefgetURI(Contig):
refget_uris: tuple[str, ...]


class ContigLink(BaseModel):
name: str
href: str = Field(..., title="HREF", description="Link to service contig record")


class Genome(BaseModel):
id: str
aliases: tuple[Alias, ...] = ()
Expand All @@ -70,9 +76,9 @@ class Genome(BaseModel):
contigs: tuple[Contig, ...]


class GenomeWithURIs(Genome):
class GenomeWithURIs[C: ContigLink | ContigWithRefgetURI](Genome):
uri: str
contigs: tuple[ContigWithRefgetURI, ...]
contigs: tuple[C, ...]
resources: tuple[OntologyResource, ...] = (NCBI_TAXON_2025_12_03,) # For resolving taxon.id CURIE


Expand Down
31 changes: 25 additions & 6 deletions bento_reference_service/routers/genomes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from __future__ import annotations

import asyncpg
import traceback

from datetime import datetime
from fastapi import APIRouter, HTTPException, Query, Request, status
from fastapi.responses import StreamingResponse
from typing import Annotated
from typing import Annotated, Literal, overload

from .. import models as m
from ..authz import authz_middleware
from ..config import ConfigDependency
from ..db import Database, DatabaseDependency
from ..drs import DrsResolverDependency
from ..logger import LoggerDependency
from ..models import ContigWithRefgetURI, ContigLink
from ..streaming import generate_uri_streaming_response
from .constants import DEPENDENCY_DELETE_REFERENCE_MATERIAL, DEPENDENCY_INGEST_REFERENCE_MATERIAL

Expand All @@ -22,10 +25,24 @@
genome_router = APIRouter(prefix="/genomes")


@overload
async def get_genome_or_raise_404(
db: Database, genome_id: str, external_resource_uris: bool = True
) -> m.GenomeWithURIs:
genome: m.GenomeWithURIs = await db.get_genome(genome_id, external_resource_uris=external_resource_uris)
db: Database, genome_id: str, external_resource_uris: bool = True, full_contigs: Literal[False] = False
) -> m.GenomeWithURIs[m.ContigLink]: ...


@overload
async def get_genome_or_raise_404(
db: Database, genome_id: str, external_resource_uris: bool = True, full_contigs: Literal[True] = True
) -> m.GenomeWithURIs[m.ContigWithRefgetURI]: ...


async def get_genome_or_raise_404[C: ContigWithRefgetURI | ContigLink](
db: Database, genome_id: str, external_resource_uris: bool = True, full_contigs: bool = False
) -> m.GenomeWithURIs[C]:
genome: m.GenomeWithURIs[C] = await db.get_genome(
genome_id, external_resource_uris=external_resource_uris, full_contigs=full_contigs
)
if genome is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Genome with ID {genome_id} not found")
return genome
Expand Down Expand Up @@ -154,14 +171,16 @@ async def genomes_delete(genome_id: str, db: DatabaseDependency):

@genome_router.get("/{genome_id}/contigs", dependencies=[authz_middleware.dep_public_endpoint()])
async def genomes_detail_contigs(genome_id: str, db: DatabaseDependency) -> tuple[m.ContigWithRefgetURI, ...]:
return (await get_genome_or_raise_404(db, genome_id)).contigs
return (await get_genome_or_raise_404(db, genome_id, full_contigs=True)).contigs


@genome_router.get("/{genome_id}/contigs/{contig_name}", dependencies=[authz_middleware.dep_public_endpoint()])
async def genomes_detail_contig_detail(
genome_id: str, contig_name: str, db: DatabaseDependency
) -> m.ContigWithRefgetURI:
genome: m.GenomeWithURIs = await get_genome_or_raise_404(db, genome_id)
# TODO: rewrite to do less DB/deserialization work

genome: m.GenomeWithURIs = await get_genome_or_raise_404(db, genome_id, full_contigs=True)

contig: m.ContigWithRefgetURI | None = next((c for c in genome.contigs if c.name == contig_name), None)
if contig is None:
Expand Down
Loading