diff --git a/checksit/check.py b/checksit/check.py index faf65eb..cda9954 100644 --- a/checksit/check.py +++ b/checksit/check.py @@ -481,6 +481,7 @@ def _get_ncas_specs( version_number = f"{version_number}.0" template = "off" spec_names = [ + "file-name", "coordinate-variables", "dimensions", "global-attrs", diff --git a/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml index 5b49889..48b279c 100644 --- a/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/1.0.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml index 53915f1..e186a9d 100644 --- a/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/1.1.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml index c392120..793003f 100644 --- a/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/2.0.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml index 223ae69..146b4ec 100644 --- a/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/2.1.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml index 4bf2c74..62b45d5 100644 --- a/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/2.2.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-radar-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml b/checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml new file mode 100644 index 0000000..d9a479b --- /dev/null +++ b/checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml @@ -0,0 +1,9 @@ +file-name-format: + func: checksit.generic.check_file_name + params: + vocab_checks: + instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ + rule_checks: + data_product: rule-func:match-one-of:birdbath|vol|rhi|ppi|radar-velocity + platform: rule-func:ceda-platform||rule-func-warning:ncas-platform + file_version: regex-rule:ncas-radar-file-version diff --git a/checksit/generic.py b/checksit/generic.py index 3753558..acfe095 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -737,16 +737,39 @@ def check_file_name( errors.append( f"[file name]: Invalid file name format - unknown data product '{file_name_parts[3]}'" ) + elif "data_product" in rule_checks.keys(): + dp_rules_check = rules.check( + rule_checks["data_product"], + file_name_parts[3], + label="[file name]: Invalid file name format -", + ) + if dp_rules_check != ([], []): + rule_errors, rule_warnings = dp_rules_check + if rule_errors != []: + errors.extend(rule_errors) + if rule_warnings != []: + warnings.extend(rule_warnings) else: msg = "No data product vocab defined in specs" raise KeyError(msg) # check version number format version_component = file_name_parts[-1].split(".nc")[0] - if not re.match(r"^v\d.\d$", version_component): - errors.append( - f"[file name]: Invalid file name format - incorrect file version number '{version_component}'" + if "file_version" in rule_checks.keys(): + file_version_check = rules.check( + rule_checks["file_version"], + version_component, + label="[file name]: Invalid file name format -", ) + if file_version_check != ([], []): + rule_errors, rule_warnings = file_version_check + if rule_errors != []: + errors.extend(rule_errors) + if rule_warnings != []: + warnings.extend(rule_warnings) + else: + msg = "No file version rule defined in specs" + raise KeyError(msg) # check number of options - max length of splitted file name if len(file_name_parts) > 8: diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index ed403d7..dc9148f 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -118,6 +118,14 @@ def __init__(self): "regex-rule": r"[^@\s]+@ncas.ac.uk", "example": "sam.jones@ncas.ac.uk", }, + "ncas-general-file-version": { + "regex-rule": r"v[0-9]+(\.[0-9]+)", + "example": "v1.0", + }, + "ncas-radar-file-version": { + "regex-rule": r"v[0-9]+(\.[0-9]+){2,}", + "example": "v1.0.0", + }, } def _map_type_rule(self, type_rule: str) -> type: diff --git a/docs/source/dev/where_does_checksit_do_it.rst b/docs/source/dev/where_does_checksit_do_it.rst index f014590..d9071f7 100644 --- a/docs/source/dev/where_does_checksit_do_it.rst +++ b/docs/source/dev/where_does_checksit_do_it.rst @@ -144,6 +144,10 @@ checks, managed by the ``Rules`` class in ``checksit/rules/rules.py``. There are - ``r"-?\d+(\.\d+)?\sm"`` * - "ncas-email" - ``r"[^@\s]+@ncas.ac.uk"`` + * - "ncas-general-file-version" + - ``r"v[0-9]+(\.[0-9]+)"`` + * - "ncas-radar-file-version" + - ``r"v[0-9]+(\.[0-9]+){2,}"`` where ``NOT_APPLICABLE_RULES`` cover phrases such as "Not Available", "Not applicable", "N/A" and diff --git a/tests/test_generic.py b/tests/test_generic.py index efe0ebf..623ea3a 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -411,7 +411,8 @@ def test_check_file_name(): "data_product": "__vocabs__:tests/test_products:test_products" } rule_checks = { - "platform": "rule-func:match-one-of:plat1|plat2" + "platform": "rule-func:match-one-of:plat1|plat2", + "file_version": r"regex:^v[0-9]+(\.[0-9]+)$", } file_name = "inst3_plat1_20220101_prod1_v1.0.nc" errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks) @@ -445,7 +446,7 @@ def test_check_file_name(): # Test that the function correctly identifies invalid version number format file_name = "inst1_plat1_20220101_prod1_v10.nc" errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks) - assert errors == ["[file name]: Invalid file name format - incorrect file version number 'v10'"] + assert errors == ["[file name]: Invalid file name format - Value 'v10' does not match regular expression: '^v[0-9]+(\\.[0-9]+)$'."] assert warnings == [] # Test that the function correctly identifies too many options in file name