From fd85932e6b81d9b187a20265088719e848cb65ea Mon Sep 17 00:00:00 2001 From: Bernhard Reiter Date: Thu, 19 Feb 2026 16:41:38 +0100 Subject: [PATCH 1/2] Improve namespace ABNF integration * add abnf-to-regexp as requirement * split namespace_patterns.py out from patterns.py as file that can be generated fully --- pyproject.toml | 1 + src/ssvc/utils/namespace_patterns.py | 31 +++++++++++++ src/ssvc/utils/patterns.py | 53 ++++----------------- uv.lock | 69 ++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 45 deletions(-) create mode 100644 src/ssvc/utils/namespace_patterns.py diff --git a/pyproject.toml b/pyproject.toml index 93a556c4..fe321fd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ testpaths = [ [dependency-groups] dev = [ + "abnf-to-regexp>=1.2.0", "black>=25.9.0", "linkchecker>=10.6.0", "pytest>=8.4.1", diff --git a/src/ssvc/utils/namespace_patterns.py b/src/ssvc/utils/namespace_patterns.py new file mode 100644 index 00000000..31ddd8d1 --- /dev/null +++ b/src/ssvc/utils/namespace_patterns.py @@ -0,0 +1,31 @@ +# --- the following section is generated with +# abnf-to-regexp --format python-nested -i ssvc_namespace_pattern.abnf | \ +# sed --expression='s/{,/{0,/g' --expression='s/\\\\#/\#/g' +alnum = '[a-zA-Z0-9]' +lower = '[a-z]' +alnumlow = f'({lower}|[0-9])' +dash = '-' +alnumlowdash = f'({alnumlow}|{dash})' +label = f'{alnumlow}(({alnumlowdash}){{0,61}}{alnumlow})?' +reverse_dns = f'{label}(\\.{label})+' +dot = '\\.' +specialchar = f'({dot}|{dash})' +fragment_seg = f'({alnumlow})+({specialchar}({alnumlow})+)*' +x_name = f'{reverse_dns}#{fragment_seg}' +x_base = f'x_{x_name}' +ns_core = f'{lower}{alnumlow}(({specialchar})?({alnumlow})+)+' +reg_base = f'{ns_core}(#{fragment_seg})?' +base_ns = f'({x_base}|{reg_base})' +singleton = '[0-9A-WY-Za-wy-z]' +bcp47 = ( + '(([a-zA-Z]{2,3}(-[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2})?|[a-z' + 'A-Z]{4,8})(-[a-zA-Z]{4})?(-([a-zA-Z]{2}|[0-9]{3}))?(-' + f'(({alnum}){{5,8}}|[0-9]({alnum}){{3}}))*(-{singleton}(-' + f'({alnum}){{2,8}})+)*(-[xX](-({alnum}){{2,8}})+)?|[xX](-' + f'({alnum}){{2,8}})+|i-default|i-mingo)' +) +translation = f'\\.({reverse_dns}|{x_name})\\${bcp47}' +ext_seg = f'({bcp47}|\\.{x_name}|{translation})' +lang_ext = f'(/|/{bcp47})' +extensions = f'{lang_ext}((/{ext_seg})+)?' +namespace = f'{base_ns}({extensions})?' diff --git a/src/ssvc/utils/patterns.py b/src/ssvc/utils/patterns.py index f24b290a..5ed2cc9f 100644 --- a/src/ssvc/utils/patterns.py +++ b/src/ssvc/utils/patterns.py @@ -2,6 +2,7 @@ """ Provides python regular expressions and utility functions for SSVC-related patterns. """ +import ssvc.utils.namespace_patterns as namespace_patterns # Copyright (c) 2025 Carnegie Mellon University. # NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE @@ -30,52 +31,14 @@ BCP_47_PATTERN = r"(([A-Za-z]{2,3}(-[A-Za-z]{3}(-[A-Za-z]{3}){0,2})?|[A-Za-z]{4,8})(-[A-Za-z]{4})?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-[A-WY-Za-wy-z0-9](-[A-Za-z0-9]{2,8})+)*(-[Xx](-[A-Za-z0-9]{1,8})+)?|[Xx](-[A-Za-z0-9]{1,8})+|[Ii]-[Dd][Ee][Ff][Aa][Uu][Ll][Tt]|[Ii]-[Mm][Ii][Nn][Gg][Oo])" """A regular expression pattern for BCP-47 language tags.""" - # --- Namespace Regex Components --- - -# --- Length constraint --- +# ---- Length constraint ---- LENGTH_CHECK_PATTERN = r"(?=.{3,1000}$)" +# ---- define base patterns to be compatible with previously existing tests +BASE_PATTERN = namespace_patterns.ns_core +BASE_NS_PATTERN = namespace_patterns.base_ns +EXT_SEGMENT_PATTERN = namespace_patterns.fragment_seg -# fmt: off -# --- the following section is generated with -# abnf-to-regexp --format python-nested -i ssvc_namespace_pattern.abnf | \ -# sed --expression='s/{,/{0,/g' --expression='s/\\\\#/\#/g' -alnum = '[a-zA-Z0-9]' -lower = '[a-z]' -alnumlow = f'({lower}|[0-9])' -dash = '-' -alnumlowdash = f'({alnumlow}|{dash})' -label = f'{alnumlow}(({alnumlowdash}){{0,61}}{alnumlow})?' -reverse_dns = f'{label}(\\.{label})+' -dot = '\\.' -specialchar = f'({dot}|{dash})' -fragment_seg = f'({alnumlow})+({specialchar}({alnumlow})+)*' -x_name = f'{reverse_dns}#{fragment_seg}' -x_base = f'x_{x_name}' -ns_core = f'{lower}{alnumlow}(({specialchar})?({alnumlow})+)+' -reg_base = f'{ns_core}(#{fragment_seg})?' -base_ns = f'({x_base}|{reg_base})' -singleton = '[0-9A-WY-Za-wy-z]' -bcp47 = ( - '(([a-zA-Z]{2,3}(-[a-zA-Z]{3}(-[a-zA-Z]{3}){0,2})?|[a-z' - 'A-Z]{4,8})(-[a-zA-Z]{4})?(-([a-zA-Z]{2}|[0-9]{3}))?(-' - f'(({alnum}){{5,8}}|[0-9]({alnum}){{3}}))*(-{singleton}(-' - f'({alnum}){{2,8}})+)*(-[xX](-({alnum}){{2,8}})+)?|[xX](-' - f'({alnum}){{2,8}})+|i-default|i-mingo)' -) -translation = f'\\.({reverse_dns}|{x_name})\\${bcp47}' -ext_seg = f'({bcp47}|\\.{x_name}|{translation})' -lang_ext = f'(/|/{bcp47})' -extensions = f'{lang_ext}((/{ext_seg})+)?' -namespace = f'{base_ns}({extensions})?' -# --- end of generated output -# fmt: on - -# --- define base patterns to be compatible with previously existing tests -BASE_PATTERN = ns_core -BASE_NS_PATTERN = base_ns -EXT_SEGMENT_PATTERN = fragment_seg - -# --- Combine all parts into the full namespace pattern --- -NS_PATTERN_STR = rf"^{namespace}$" +# ---- Combine all parts into the full namespace pattern ---- +NS_PATTERN_STR = rf"^{namespace_patterns.namespace}$" diff --git a/uv.lock b/uv.lock index 039975c6..c23b0ed7 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,33 @@ version = 1 revision = 3 requires-python = ">=3.12" +[[package]] +name = "abnf" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/f2/7b5fac50ee42e8b8d4a098d76743a394546f938c94125adbb93414e5ae7d/abnf-2.2.0.tar.gz", hash = "sha256:433380fd32855bbc60bc7b3d35d40616e21383a32ed1c9b8893d16d9f4a6c2f4", size = 197507, upload-time = "2023-03-17T18:26:24.577Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/95/f456ae7928a2f3a913f467d4fd9e662e295dd7349fc58b35f77f6c757a23/abnf-2.2.0-py3-none-any.whl", hash = "sha256:5dc2ae31a84ff454f7de46e08a2a21a442a0e21a092468420587a1590b490d1f", size = 39938, upload-time = "2023-03-17T18:26:22.608Z" }, +] + +[[package]] +name = "abnf-to-regexp" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "abnf" }, + { name = "icontract" }, + { name = "regex" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/23/6f3e11413bfac92688555c972a008f04cbc86b5e2e1a74649c919f2999a6/abnf_to_regexp-1.2.0.tar.gz", hash = "sha256:c3b51771e8015adeedfcd15857ee2a9a4d0595355b672571ab1b0f2adb535a0a", size = 25883, upload-time = "2025-11-08T18:15:10.232Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/8e/ca543f8ba7b89a632f059c19fdbcb24a640847b3603856f0cce2de45da91/abnf_to_regexp-1.2.0-py3-none-any.whl", hash = "sha256:63d330ebe080d4e68c252033449b0ea03be35847abf8fb17f52cd9afbed25b4d", size = 24393, upload-time = "2025-11-08T18:15:08.481Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -25,6 +52,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/12/e5e0282d673bb9746bacfb6e2dba8719989d3660cdb2ea79aee9a9651afb/anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1", size = 107213, upload-time = "2025-08-04T08:54:24.882Z" }, ] +[[package]] +name = "asttokens" +version = "2.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/45/1d/f03bcb60c4a3212e15f99a56085d93093a497718adf828d050b9d675da81/asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0", size = 62284, upload-time = "2023-10-26T10:03:05.06Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/86/4736ac618d82a20d87d2f92ae19441ebc7ac9e7a581d7e58bbe79233b24a/asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", size = 27764, upload-time = "2023-10-26T10:03:01.789Z" }, +] + [[package]] name = "attrs" version = "25.3.0" @@ -131,6 +170,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "abnf-to-regexp" }, { name = "black" }, { name = "linkchecker" }, { name = "pytest" }, @@ -161,6 +201,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "abnf-to-regexp", specifier = ">=1.2.0" }, { name = "black", specifier = ">=25.9.0" }, { name = "linkchecker", specifier = ">=10.6.0" }, { name = "pytest", specifier = ">=8.4.1" }, @@ -419,6 +460,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[[package]] +name = "icontract" +version = "2.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/41/93/8a7d5850d848cba73aa094b961459d8101b7c96837b2e8dc91083adcf7ae/icontract-2.7.3.tar.gz", hash = "sha256:df37a43d86d532407bc6b84dea29dd9f7ece794b73211769fa8a33a76b8ed145", size = 68361, upload-time = "2026-01-29T13:11:14.724Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/aa/ffc16b7ed17e052e7d2140e66336f829e9383f61f31249c4d181e12fcdea/icontract-2.7.3-py3-none-any.whl", hash = "sha256:8fb0f93f71211416f214f37c01b8017fdc8c079d70159f49f0ddcc10918c14d0", size = 40983, upload-time = "2026-01-29T13:11:13.198Z" }, +] + [[package]] name = "idna" version = "3.10" @@ -1317,6 +1371,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" }, ] +[[package]] +name = "regex" +version = "2021.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/38/3f/4c42a98c9ad7d08c16e7d23b2194a0e4f3b2914662da8bc88986e4e6de1f/regex-2021.4.4.tar.gz", hash = "sha256:52ba3d3f9b942c49d7e4bc105bb28551c44065f139a65062ab7912bef10c9afb", size = 693187, upload-time = "2021-04-04T16:50:49.77Z" } + [[package]] name = "requests" version = "2.32.5" @@ -1642,6 +1702,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + [[package]] name = "soupsieve" version = "2.8" From d072c7beae7ff64adec5edcca11d027dabdff39f Mon Sep 17 00:00:00 2001 From: Bernhard Reiter Date: Thu, 19 Feb 2026 17:10:49 +0100 Subject: [PATCH 2/2] improve ABNFpattern integration * add Makefile rule * add autogeneration warning to namespace_patterns.py resolve #899 --- Makefile | 23 ++++++++++++++--------- src/ssvc/utils/namespace_patterns.py | 4 +--- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index deaab0a4..a9079fb0 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,12 @@ UV_RUN=uv run all: help -dev: + +src/ssvc/utils/namespace_patterns.py:src/ssvc/utils/ssvc_namespace_pattern.abnf + echo "# AUTOGENERATED, DO NOT CHANGE manually, see Makefile" >$@ + uv run abnf-to-regexp --format python-nested -i $< >>$@ + +dev: src/ssvc/utils/namespace_patterns.py @echo "Set up dev environment..." uv sync --dev @@ -18,32 +23,32 @@ mdlint_fix: @echo "Running markdownlint..." markdownlint --config .markdownlint.yml --fix . -test: +test: src/ssvc/utils/namespace_patterns.py @echo "Running tests locally..." $(UV_RUN) pytest -v -docker_test: +docker_test: src/ssvc/utils/namespace_patterns.py @echo "Building the latest test image..." $(DOCKER_COMPOSE) build test @echo "Running tests in Docker..." $(DOCKER_COMPOSE) run --rm test -docs_local: +docs_local: src/ssvc/utils/namespace_patterns.py @echo "Building and running docs locally..." $(UV_RUN) mkdocs serve -docs: +docs: src/ssvc/utils/namespace_patterns.py @echo "Building and running docs in Docker..." $(DOCKER_COMPOSE) up docs -api: +api: src/ssvc/utils/namespace_patterns.py @echo "Building and running API in Docker..." $(DOCKER_COMPOSE) up api -api_dev: +api_dev: src/ssvc/utils/namespace_patterns.py $(UV_RUN) uvicorn ssvc.api.main:app --reload -up: +up: src/ssvc/utils/namespace_patterns.py @echo "Starting Docker services..." $(DOCKER_COMPOSE) up -d @@ -51,7 +56,7 @@ down: @echo "Stopping Docker services..." $(DOCKER_COMPOSE) down -regenerate_json: +regenerate_json: src/ssvc/utils/namespace_patterns.py @echo "Regenerating JSON files..." rm -rf data/json/decision_points export PYTHONPATH=$(PWD)/src && ./src/ssvc/doctools.py --datadir=./data --overwrite diff --git a/src/ssvc/utils/namespace_patterns.py b/src/ssvc/utils/namespace_patterns.py index 31ddd8d1..fd9d9233 100644 --- a/src/ssvc/utils/namespace_patterns.py +++ b/src/ssvc/utils/namespace_patterns.py @@ -1,6 +1,4 @@ -# --- the following section is generated with -# abnf-to-regexp --format python-nested -i ssvc_namespace_pattern.abnf | \ -# sed --expression='s/{,/{0,/g' --expression='s/\\\\#/\#/g' +# AUTOGENERATED, DO NOT CHANGE manually, see Makefile alnum = '[a-zA-Z0-9]' lower = '[a-z]' alnumlow = f'({lower}|[0-9])'