diff --git a/bento_lib/discovery/models/config.py b/bento_lib/discovery/models/config.py index 6a7fbdd5..339b3f16 100644 --- a/bento_lib/discovery/models/config.py +++ b/bento_lib/discovery/models/config.py @@ -5,7 +5,7 @@ from ..exceptions import DiscoveryValidationError from .fields import FieldDefinition -from .overview import OverviewSection +from .overview import OverviewChart, OverviewSection from .search import SearchSection from ._internal import NoAdditionalProperties @@ -17,6 +17,8 @@ "DiscoveryConfig", ] +CHART_DEF_NOT_FOUND = "chart definition not found" +CHART_ALREADY_SEEN = "chart already seen" FIELD_DEF_NOT_FOUND = "field definition not found" FIELD_ALREADY_SEEN = "field already seen" @@ -84,6 +86,25 @@ class DiscoveryConfig(BaseModel, NoAdditionalProperties): # information about who prepared the discovery configuration and when it was generated. metadata: DiscoveryConfigMetadata = DiscoveryConfigMetadata() + # Shared chart definitions. Dictionary of {chart_id: chart definition}. For now, these are just used for catalogue + # charts, but in the long run they should be used for overview charts as well. + charts: dict[str, OverviewChart] = Field( + default_factory=dict, + title="Chart definitions", + description="Chart definitions for use in catalogue_charts. These are not yet used for overview charts.", + ) + + # TODO: A future breaking change could move chart specification to be entirely in the charts field above, rather + # than fixed into a layout, and then the same chart definition could be used for both catalogue and overview. + + catalogue_charts: list[str] = Field( + default_factory=list, + title="Catalogue charts", + description=( + "Chart IDs to show in the data catalogue view (if enabled.) This only applies for an instance-level " + "configuration file; in any other scope, it will be ignored." + ), + ) overview: list[OverviewSection] = [] search: list[SearchSection] = [] fields: dict[str, FieldDefinition] = {} @@ -95,6 +116,34 @@ class DiscoveryConfig(BaseModel, NoAdditionalProperties): # Validators ------------------------------------------------------------------------------------------------------- + def _check_chart_definitions(self): + seen_chart_fields: set[str] = set() + for chart_id, chart in self.charts.items(): + exc_path = f"charts > {chart_id}" + log_data = dict(chart_id=chart_id, field_id=chart.field) + if chart.field not in self.fields: + raise DiscoveryValidationError(FIELD_DEF_NOT_FOUND, exc_path, log_data) + if chart.field in seen_chart_fields: + raise DiscoveryValidationError(FIELD_ALREADY_SEEN, exc_path, log_data) + seen_chart_fields.add(chart.field) + + def _check_catalogue_chart_references(self): + """ + Validate overview and check for chart duplicates. + Raises a DiscoveryValidationError if an error is found; otherwise, does nothing. + """ + + seen_charts: set[str] = set() + + for c_idx, chart_id in enumerate(self.catalogue_charts): + exc_path = f"catalogue_charts > {chart_id} [{c_idx}]" + log_data = dict(chart_id=chart_id, chart_idx=c_idx) + if chart_id not in self.charts: + raise DiscoveryValidationError(CHART_DEF_NOT_FOUND, exc_path, log_data) + if chart_id in seen_charts: + raise DiscoveryValidationError(CHART_ALREADY_SEEN, exc_path, log_data) + seen_charts.add(chart_id) + def _check_overview_field_references(self): """ Validate overview and check for chart duplicates. @@ -131,17 +180,21 @@ def _check_search_field_references(self): @model_validator(mode="after") def check_field_references(self) -> Self: + self._check_chart_definitions() + self._check_catalogue_chart_references() self._check_overview_field_references() self._check_search_field_references() return self # Methods ---------------------------------------------------------------------------------------------------------- - def get_chart_field_ids(self) -> tuple[str, ...]: + def get_chart_field_ids(self, catalogue_mode: bool = False) -> tuple[str, ...]: """ Gets all field IDs used by charts. :return: A tuple of field IDs. """ + if catalogue_mode: + return tuple(self.charts[chart_id].field for chart_id in self.catalogue_charts) return tuple(chart.field for section in self.overview for chart in section.charts) def get_searchable_field_ids(self) -> tuple[str, ...]: diff --git a/pyproject.toml b/pyproject.toml index 2e9e1a98..d1ce7661 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bento-lib" -version = "15.1.1" +version = "15.2.0" description = "A set of common utilities and helpers for Bento platform services." authors = [ "David Lougheed ", diff --git a/tests/common.py b/tests/common.py index 7a8f32ed..f1febb8a 100644 --- a/tests/common.py +++ b/tests/common.py @@ -17,6 +17,10 @@ "DISCOVERY_CONFIG_INVALID_3_PATH", "DISCOVERY_CONFIG_INVALID_4_PATH", "DISCOVERY_CONFIG_INVALID_5_PATH", + "DISCOVERY_CONFIG_INVALID_6_PATH", + "DISCOVERY_CONFIG_INVALID_7_PATH", + "DISCOVERY_CONFIG_INVALID_8_PATH", + "DISCOVERY_CONFIG_INVALID_9_PATH", "DISCOVERY_CONFIG_WARNING_PATH", "SARS_COV_2_FASTA_PATH", "WDL_DIR", @@ -55,6 +59,10 @@ DISCOVERY_CONFIG_INVALID_3_PATH = DATA_DIR / "discovery_config_invalid_3.json" DISCOVERY_CONFIG_INVALID_4_PATH = DATA_DIR / "discovery_config_invalid_4.json" DISCOVERY_CONFIG_INVALID_5_PATH = DATA_DIR / "discovery_config_invalid_5.json" +DISCOVERY_CONFIG_INVALID_6_PATH = DATA_DIR / "discovery_config_invalid_6.json" +DISCOVERY_CONFIG_INVALID_7_PATH = DATA_DIR / "discovery_config_invalid_7.json" +DISCOVERY_CONFIG_INVALID_8_PATH = DATA_DIR / "discovery_config_invalid_8.json" +DISCOVERY_CONFIG_INVALID_9_PATH = DATA_DIR / "discovery_config_invalid_9.json" DISCOVERY_CONFIG_WARNING_PATH = DATA_DIR / "discovery_config_warning.json" SARS_COV_2_FASTA_PATH = DATA_DIR / "sars_cov_2.fa" diff --git a/tests/data/discovery_config_invalid_6.json b/tests/data/discovery_config_invalid_6.json new file mode 100644 index 00000000..0af33d1b --- /dev/null +++ b/tests/data/discovery_config_invalid_6.json @@ -0,0 +1,59 @@ +{ + "version": "1", + "metadata": { + "description": "An invalid test discovery configuration", + "authors": ["David Lougheed "], + "timestamp": "2025-04-17T10:56:58Z" + }, + "charts": { + "age": { + "field": "age", + "chart_type": "histogram" + }, + "sex": { + "field": "sex", + "chart_type": "pie" + } + }, + "catalogue_charts": ["age", "sex", "date_of_consent"], + "overview": [], + "search": [], + "fields": { + "age": { + "mapping": "individual/age_numeric", + "title": "Age", + "description": "Age at arrival", + "datatype": "number", + "config": { + "bin_size": 10, + "taper_left": 10, + "taper_right": 100, + "units": "years", + "minimum": 0, + "maximum": 100 + } + }, + "sex": { + "mapping": "individual/sex", + "title": "Sex", + "description": "Sex at birth", + "datatype": "string", + "config": { + "enum": null + } + }, + "date_of_consent": { + "mapping": "individual/extra_properties/date_of_consent", + "title": "Verbal consent date", + "description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd", + "datatype": "date", + "config": { + "bin_by": "month" + } + } + }, + "rules": { + "count_threshold": 5, + "max_query_parameters": 2 + } +} diff --git a/tests/data/discovery_config_invalid_7.json b/tests/data/discovery_config_invalid_7.json new file mode 100644 index 00000000..06d59426 --- /dev/null +++ b/tests/data/discovery_config_invalid_7.json @@ -0,0 +1,59 @@ +{ + "version": "1", + "metadata": { + "description": "An invalid test discovery configuration", + "authors": ["David Lougheed "], + "timestamp": "2025-04-17T10:56:58Z" + }, + "charts": { + "age": { + "field": "ageeeee", + "chart_type": "histogram" + }, + "sex": { + "field": "sex", + "chart_type": "pie" + } + }, + "catalogue_charts": ["age", "sex"], + "overview": [], + "search": [], + "fields": { + "age": { + "mapping": "individual/age_numeric", + "title": "Age", + "description": "Age at arrival", + "datatype": "number", + "config": { + "bin_size": 10, + "taper_left": 10, + "taper_right": 100, + "units": "years", + "minimum": 0, + "maximum": 100 + } + }, + "sex": { + "mapping": "individual/sex", + "title": "Sex", + "description": "Sex at birth", + "datatype": "string", + "config": { + "enum": null + } + }, + "date_of_consent": { + "mapping": "individual/extra_properties/date_of_consent", + "title": "Verbal consent date", + "description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd", + "datatype": "date", + "config": { + "bin_by": "month" + } + } + }, + "rules": { + "count_threshold": 5, + "max_query_parameters": 2 + } +} diff --git a/tests/data/discovery_config_invalid_8.json b/tests/data/discovery_config_invalid_8.json new file mode 100644 index 00000000..360d8bc0 --- /dev/null +++ b/tests/data/discovery_config_invalid_8.json @@ -0,0 +1,63 @@ +{ + "version": "1", + "metadata": { + "description": "An invalid test discovery configuration", + "authors": ["David Lougheed "], + "timestamp": "2025-04-17T10:56:58Z" + }, + "charts": { + "age": { + "field": "age", + "chart_type": "histogram" + }, + "age2": { + "field": "age", + "chart_type": "histogram" + }, + "sex": { + "field": "sex", + "chart_type": "pie" + } + }, + "catalogue_charts": ["age", "sex"], + "overview": [], + "search": [], + "fields": { + "age": { + "mapping": "individual/age_numeric", + "title": "Age", + "description": "Age at arrival", + "datatype": "number", + "config": { + "bin_size": 10, + "taper_left": 10, + "taper_right": 100, + "units": "years", + "minimum": 0, + "maximum": 100 + } + }, + "sex": { + "mapping": "individual/sex", + "title": "Sex", + "description": "Sex at birth", + "datatype": "string", + "config": { + "enum": null + } + }, + "date_of_consent": { + "mapping": "individual/extra_properties/date_of_consent", + "title": "Verbal consent date", + "description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd", + "datatype": "date", + "config": { + "bin_by": "month" + } + } + }, + "rules": { + "count_threshold": 5, + "max_query_parameters": 2 + } +} diff --git a/tests/data/discovery_config_invalid_9.json b/tests/data/discovery_config_invalid_9.json new file mode 100644 index 00000000..8e75bd32 --- /dev/null +++ b/tests/data/discovery_config_invalid_9.json @@ -0,0 +1,59 @@ +{ + "version": "1", + "metadata": { + "description": "An invalid test discovery configuration", + "authors": ["David Lougheed "], + "timestamp": "2025-04-17T10:56:58Z" + }, + "charts": { + "age": { + "field": "age", + "chart_type": "histogram" + }, + "sex": { + "field": "sex", + "chart_type": "pie" + } + }, + "catalogue_charts": ["age", "sex", "sex"], + "overview": [], + "search": [], + "fields": { + "age": { + "mapping": "individual/age_numeric", + "title": "Age", + "description": "Age at arrival", + "datatype": "number", + "config": { + "bin_size": 10, + "taper_left": 10, + "taper_right": 100, + "units": "years", + "minimum": 0, + "maximum": 100 + } + }, + "sex": { + "mapping": "individual/sex", + "title": "Sex", + "description": "Sex at birth", + "datatype": "string", + "config": { + "enum": null + } + }, + "date_of_consent": { + "mapping": "individual/extra_properties/date_of_consent", + "title": "Verbal consent date", + "description": "Date of initial verbal consent (participant, legal representative or tutor), yyyy-mm-dd", + "datatype": "date", + "config": { + "bin_by": "month" + } + } + }, + "rules": { + "count_threshold": 5, + "max_query_parameters": 2 + } +} diff --git a/tests/test_discovery_config.py b/tests/test_discovery_config.py index 149e409d..2ad94d95 100644 --- a/tests/test_discovery_config.py +++ b/tests/test_discovery_config.py @@ -20,10 +20,30 @@ DISCOVERY_CONFIG_INVALID_3_PATH, DISCOVERY_CONFIG_INVALID_4_PATH, DISCOVERY_CONFIG_INVALID_5_PATH, + DISCOVERY_CONFIG_INVALID_6_PATH, + DISCOVERY_CONFIG_INVALID_7_PATH, + DISCOVERY_CONFIG_INVALID_8_PATH, + DISCOVERY_CONFIG_INVALID_9_PATH, DISCOVERY_CONFIG_WARNING_PATH, ) +AGE_NUMERIC_FIELD_DICT = { + "mapping": "individual/age_numeric", + "title": "Age", + "description": "Age at arrival", + "datatype": "number", + "minimum_permissions": "data", + "config": { + "bin_size": 10, + "taper_left": 10, + "taper_right": 100, + "units": "years", + "minimum": 0, + "maximum": 100, + }, +} + AGE_HISTOGRAM_BASIC_DICT = { "field": "age", "chart_type": "histogram", @@ -83,12 +103,14 @@ def test_overview_charts_def(): def test_load_discovery_config_dict(): cfg, warnings = load_discovery_config_from_dict( { + "charts": { + "age": AGE_HISTOGRAM_BASIC_DICT, + }, + "catalogue_charts": ["age"], "overview": [ { "section_title": "General", - "charts": [ - {"field": "age", "chart_type": "histogram"}, - ], + "charts": [AGE_HISTOGRAM_BASIC_DICT], "default_charts": ["age"], } ], @@ -101,26 +123,14 @@ def test_load_discovery_config_dict(): }, ], "fields": { - "age": { - "mapping": "individual/age_numeric", - "title": "Age", - "description": "Age at arrival", - "datatype": "number", - "minimum_permissions": "data", - "config": { - "bin_size": 10, - "taper_left": 10, - "taper_right": 100, - "units": "years", - "minimum": 0, - "maximum": 100, - }, - }, + "age": AGE_NUMERIC_FIELD_DICT, }, "rules": {"count_threshold": 5, "max_query_parameters": 2}, } ) + assert len(cfg.charts) == 1 + assert len(cfg.catalogue_charts) == 1 assert len(cfg.overview) == 1 assert cfg.get_chart_field_ids() == ("age",) assert cfg.get_searchable_field_ids() == ("age",) @@ -151,9 +161,14 @@ def test_load_discovery_config_dict(): assert cfg.fields["age"].get_entity() == "individual" assert cfg.fields["age"].root.get_entity_and_field_path() == ("individual", ("age_numeric",)) + # catalogue mode + assert cfg.get_chart_field_ids(catalogue_mode=True) == ("age",) + def test_load_discovery_config_dict_blank(): cfg, warnings = load_discovery_config_from_dict({}) + assert cfg.charts == {} + assert cfg.catalogue_charts == [] assert cfg.overview == [] assert cfg.search == [] assert cfg.fields == {} @@ -204,6 +219,30 @@ def test_load_discovery_config(): ValidationError, "search > section Measurements [0] > lab_test_result_value [1] [type=field already seen", ), + # missing chart definition in config.charts referenced in catalogue_charts + ( + DISCOVERY_CONFIG_INVALID_6_PATH, + ValidationError, + "catalogue_charts > date_of_consent [2] [type=chart definition not found", + ), + # bad field definition in chart definition + ( + DISCOVERY_CONFIG_INVALID_7_PATH, + ValidationError, + "charts > age [type=field definition not found", + ), + # same field repeated in two different chart definitions + ( + DISCOVERY_CONFIG_INVALID_8_PATH, + ValidationError, + "charts > age2 [type=field already seen", + ), + # same chart ID repeated multiple times in catalogue_charts + ( + DISCOVERY_CONFIG_INVALID_9_PATH, + ValidationError, + "catalogue_charts > sex [2] [type=chart already seen", + ), ], ) def test_load_invalid_discovery_configs(path: Path, exc: Type[Exception], exc_str: str):