Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 55 additions & 2 deletions bento_lib/discovery/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from ..exceptions import DiscoveryValidationError
from .fields import FieldDefinition
from .overview import OverviewSection
from .overview import OverviewChart, OverviewSection
from .search import SearchSection
from ._internal import NoAdditionalProperties

Expand All @@ -17,6 +17,8 @@
"DiscoveryConfig",
]

CHART_DEF_NOT_FOUND = "chart definition not found"
CHART_ALREADY_SEEN = "chart already seen"
FIELD_DEF_NOT_FOUND = "field definition not found"
FIELD_ALREADY_SEEN = "field already seen"

Expand Down Expand Up @@ -84,6 +86,25 @@ class DiscoveryConfig(BaseModel, NoAdditionalProperties):
# information about who prepared the discovery configuration and when it was generated.
metadata: DiscoveryConfigMetadata = DiscoveryConfigMetadata()

# Shared chart definitions. Dictionary of {chart_id: chart definition}. For now, these are just used for catalogue
# charts, but in the long run they should be used for overview charts as well.
charts: dict[str, OverviewChart] = Field(
default_factory=dict,
title="Chart definitions",
description="Chart definitions for use in catalogue_charts. These are not yet used for overview charts.",
)

# TODO: A future breaking change could move chart specification to be entirely in the charts field above, rather
# than fixed into a layout, and then the same chart definition could be used for both catalogue and overview.

catalogue_charts: list[str] = Field(
default_factory=list,
title="Catalogue charts",
description=(
"Chart IDs to show in the data catalogue view (if enabled.) This only applies for an instance-level "
"configuration file; in any other scope, it will be ignored."
),
)
overview: list[OverviewSection] = []
search: list[SearchSection] = []
fields: dict[str, FieldDefinition] = {}
Expand All @@ -95,6 +116,34 @@ class DiscoveryConfig(BaseModel, NoAdditionalProperties):

# Validators -------------------------------------------------------------------------------------------------------

def _check_chart_definitions(self):
seen_chart_fields: set[str] = set()
for chart_id, chart in self.charts.items():
exc_path = f"charts > {chart_id}"
log_data = dict(chart_id=chart_id, field_id=chart.field)
if chart.field not in self.fields:
raise DiscoveryValidationError(FIELD_DEF_NOT_FOUND, exc_path, log_data)
if chart.field in seen_chart_fields:
raise DiscoveryValidationError(FIELD_ALREADY_SEEN, exc_path, log_data)
seen_chart_fields.add(chart.field)

def _check_catalogue_chart_references(self):
"""
Validate overview and check for chart duplicates.
Raises a DiscoveryValidationError if an error is found; otherwise, does nothing.
"""

seen_charts: set[str] = set()

for c_idx, chart_id in enumerate(self.catalogue_charts):
exc_path = f"catalogue_charts > {chart_id} [{c_idx}]"
log_data = dict(chart_id=chart_id, chart_idx=c_idx)
if chart_id not in self.charts:
raise DiscoveryValidationError(CHART_DEF_NOT_FOUND, exc_path, log_data)
if chart_id in seen_charts:
raise DiscoveryValidationError(CHART_ALREADY_SEEN, exc_path, log_data)
seen_charts.add(chart_id)

def _check_overview_field_references(self):
"""
Validate overview and check for chart duplicates.
Expand Down Expand Up @@ -131,17 +180,21 @@ def _check_search_field_references(self):

@model_validator(mode="after")
def check_field_references(self) -> Self:
self._check_chart_definitions()
self._check_catalogue_chart_references()
self._check_overview_field_references()
self._check_search_field_references()
return self

# Methods ----------------------------------------------------------------------------------------------------------

def get_chart_field_ids(self) -> tuple[str, ...]:
def get_chart_field_ids(self, catalogue_mode: bool = False) -> tuple[str, ...]:
"""
Gets all field IDs used by charts.
:return: A tuple of field IDs.
"""
if catalogue_mode:
return tuple(self.charts[chart_id].field for chart_id in self.catalogue_charts)
return tuple(chart.field for section in self.overview for chart in section.charts)

def get_searchable_field_ids(self) -> tuple[str, ...]:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bento-lib"
version = "15.1.1"
version = "15.2.0"
description = "A set of common utilities and helpers for Bento platform services."
authors = [
"David Lougheed <david.lougheed@mail.mcgill.ca>",
Expand Down
8 changes: 8 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
"DISCOVERY_CONFIG_INVALID_3_PATH",
"DISCOVERY_CONFIG_INVALID_4_PATH",
"DISCOVERY_CONFIG_INVALID_5_PATH",
"DISCOVERY_CONFIG_INVALID_6_PATH",
"DISCOVERY_CONFIG_INVALID_7_PATH",
"DISCOVERY_CONFIG_INVALID_8_PATH",
"DISCOVERY_CONFIG_INVALID_9_PATH",
"DISCOVERY_CONFIG_WARNING_PATH",
"SARS_COV_2_FASTA_PATH",
"WDL_DIR",
Expand Down Expand Up @@ -55,6 +59,10 @@
DISCOVERY_CONFIG_INVALID_3_PATH = DATA_DIR / "discovery_config_invalid_3.json"
DISCOVERY_CONFIG_INVALID_4_PATH = DATA_DIR / "discovery_config_invalid_4.json"
DISCOVERY_CONFIG_INVALID_5_PATH = DATA_DIR / "discovery_config_invalid_5.json"
DISCOVERY_CONFIG_INVALID_6_PATH = DATA_DIR / "discovery_config_invalid_6.json"
DISCOVERY_CONFIG_INVALID_7_PATH = DATA_DIR / "discovery_config_invalid_7.json"
DISCOVERY_CONFIG_INVALID_8_PATH = DATA_DIR / "discovery_config_invalid_8.json"
DISCOVERY_CONFIG_INVALID_9_PATH = DATA_DIR / "discovery_config_invalid_9.json"
DISCOVERY_CONFIG_WARNING_PATH = DATA_DIR / "discovery_config_warning.json"
SARS_COV_2_FASTA_PATH = DATA_DIR / "sars_cov_2.fa"

Expand Down
59 changes: 59 additions & 0 deletions tests/data/discovery_config_invalid_6.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"version": "1",
"metadata": {
"description": "An invalid test discovery configuration",
"authors": ["David Lougheed <david.lougheed@mail.mcgill.ca>"],
"timestamp": "2025-04-17T10:56:58Z"
},
"charts": {
"age": {
"field": "age",
"chart_type": "histogram"
},
"sex": {
"field": "sex",
"chart_type": "pie"
}
},
"catalogue_charts": ["age", "sex", "date_of_consent"],
"overview": [],
"search": [],
"fields": {
"age": {
"mapping": "individual/age_numeric",
"title": "Age",
"description": "Age at arrival",
"datatype": "number",
"config": {
"bin_size": 10,
"taper_left": 10,
"taper_right": 100,
"units": "years",
"minimum": 0,
"maximum": 100
}
},
"sex": {
"mapping": "individual/sex",
"title": "Sex",
"description": "Sex at birth",
"datatype": "string",
"config": {
"enum": null
}
},
"date_of_consent": {
"mapping": "individual/extra_properties/date_of_consent",
"title": "Verbal consent date",
"description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd",
"datatype": "date",
"config": {
"bin_by": "month"
}
}
},
"rules": {
"count_threshold": 5,
"max_query_parameters": 2
}
}
59 changes: 59 additions & 0 deletions tests/data/discovery_config_invalid_7.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"version": "1",
"metadata": {
"description": "An invalid test discovery configuration",
"authors": ["David Lougheed <david.lougheed@mail.mcgill.ca>"],
"timestamp": "2025-04-17T10:56:58Z"
},
"charts": {
"age": {
"field": "ageeeee",
"chart_type": "histogram"
},
"sex": {
"field": "sex",
"chart_type": "pie"
}
},
"catalogue_charts": ["age", "sex"],
"overview": [],
"search": [],
"fields": {
"age": {
"mapping": "individual/age_numeric",
"title": "Age",
"description": "Age at arrival",
"datatype": "number",
"config": {
"bin_size": 10,
"taper_left": 10,
"taper_right": 100,
"units": "years",
"minimum": 0,
"maximum": 100
}
},
"sex": {
"mapping": "individual/sex",
"title": "Sex",
"description": "Sex at birth",
"datatype": "string",
"config": {
"enum": null
}
},
"date_of_consent": {
"mapping": "individual/extra_properties/date_of_consent",
"title": "Verbal consent date",
"description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd",
"datatype": "date",
"config": {
"bin_by": "month"
}
}
},
"rules": {
"count_threshold": 5,
"max_query_parameters": 2
}
}
63 changes: 63 additions & 0 deletions tests/data/discovery_config_invalid_8.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"version": "1",
"metadata": {
"description": "An invalid test discovery configuration",
"authors": ["David Lougheed <david.lougheed@mail.mcgill.ca>"],
"timestamp": "2025-04-17T10:56:58Z"
},
"charts": {
"age": {
"field": "age",
"chart_type": "histogram"
},
"age2": {
"field": "age",
"chart_type": "histogram"
},
"sex": {
"field": "sex",
"chart_type": "pie"
}
},
"catalogue_charts": ["age", "sex"],
"overview": [],
"search": [],
"fields": {
"age": {
"mapping": "individual/age_numeric",
"title": "Age",
"description": "Age at arrival",
"datatype": "number",
"config": {
"bin_size": 10,
"taper_left": 10,
"taper_right": 100,
"units": "years",
"minimum": 0,
"maximum": 100
}
},
"sex": {
"mapping": "individual/sex",
"title": "Sex",
"description": "Sex at birth",
"datatype": "string",
"config": {
"enum": null
}
},
"date_of_consent": {
"mapping": "individual/extra_properties/date_of_consent",
"title": "Verbal consent date",
"description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd",
"datatype": "date",
"config": {
"bin_by": "month"
}
}
},
"rules": {
"count_threshold": 5,
"max_query_parameters": 2
}
}
59 changes: 59 additions & 0 deletions tests/data/discovery_config_invalid_9.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"version": "1",
"metadata": {
"description": "An invalid test discovery configuration",
"authors": ["David Lougheed <david.lougheed@mail.mcgill.ca>"],
"timestamp": "2025-04-17T10:56:58Z"
},
"charts": {
"age": {
"field": "age",
"chart_type": "histogram"
},
"sex": {
"field": "sex",
"chart_type": "pie"
}
},
"catalogue_charts": ["age", "sex", "sex"],
"overview": [],
"search": [],
"fields": {
"age": {
"mapping": "individual/age_numeric",
"title": "Age",
"description": "Age at arrival",
"datatype": "number",
"config": {
"bin_size": 10,
"taper_left": 10,
"taper_right": 100,
"units": "years",
"minimum": 0,
"maximum": 100
}
},
"sex": {
"mapping": "individual/sex",
"title": "Sex",
"description": "Sex at birth",
"datatype": "string",
"config": {
"enum": null
}
},
"date_of_consent": {
"mapping": "individual/extra_properties/date_of_consent",
"title": "Verbal consent date",
"description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd",
"datatype": "date",
"config": {
"bin_by": "month"
}
}
},
"rules": {
"count_threshold": 5,
"max_query_parameters": 2
}
}
Loading
Loading