diff --git a/README.md b/README.md index ad713f0..ac521f2 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,84 @@ ORDER BY sort_path; +#### Befüllen einer Datenbank mit MIG-Informationen +Analog zu den AHBs lassen sich auch MIGs in eine Datenbank überführen und "flach" ziehen. +Da MIGs die vollständige Nachrichtenstruktur beschreiben (Segmentgruppen, Segmente, Datenelementgruppen, Datenelemente und Codes), ist die Hierarchie oft tiefer als bei AHBs. + +```python +# pip install fundamend[sqlmodels] +from pathlib import Path +from fundamend.sqlmodels import create_db_and_populate_with_mig_view, MigHierarchyMaterialized +from sqlmodel import Session, create_engine, select + +mig_paths = [ + Path("UTILTS_MIG_1.1c_Lesefassung_2023_12_12.xml"), + # weitere MIG XML-Dateien hier hinzufügen +] +sqlite_file = create_db_and_populate_with_mig_view(mig_paths) +engine = create_engine(f"sqlite:///{sqlite_file}") +with Session(bind=engine) as session: + stmt = select(MigHierarchyMaterialized).where(MigHierarchyMaterialized.format == "UTILTS").order_by( + MigHierarchyMaterialized.sort_path + ) + results = session.exec(stmt).all() +``` +oder in plain SQL: +```sql +-- sqlite dialect +SELECT path, + type, + segmentgroup_id, + segment_id, + segment_name, + dataelement_id, + dataelement_name, + code_value, + code_name, + line_status_std, + line_status_specification +FROM mig_hierarchy_materialized +WHERE format = 'UTILTS' +ORDER BY sort_path; +``` + +
+Finde heraus, welche Zeilen in einem MIG zwischen zwei Versionen hinzukommen, gelöscht oder geändert wurden +
+ +Dafür gibt es die View `v_mig_diff`, die mit `create_mig_diff_view(session)` erstellt werden kann: +```python +from fundamend.sqlmodels import create_mig_diff_view +create_mig_diff_view(session) +``` + +Die View erwartet 4 Filter-Parameter beim Abfragen und liefert einen `diff_status`: +- `added`: Zeile existiert in der neuen Version, aber nicht in der alten +- `deleted`: Zeile existiert in der alten Version, aber nicht in der neuen +- `modified`: Zeile existiert in beiden Versionen, aber mit unterschiedlichen Werten (bei `modified` enthält `changed_columns` die Liste der geänderten Spalten) +- `unchanged`: Zeile ist in beiden Versionen identisch + +Alle Wert-Spalten existieren doppelt (`old_*` und `new_*`), um die Werte aus beiden Versionen nebeneinander anzuzeigen. + +**Matching-Strategie:** Diese View matched Zeilen anhand ihrer `id_path`-Spalte, die semantische Qualifier verwendet (z.B. `SG2>SG3>FTX+ACD>C_C107>D_4441>`), um Zeilen über Versionen hinweg zu identifizieren. Das ist konsistent mit der AHB-Diff-View. + +```sql +-- Alle Änderungen zwischen zwei MIG-Versionen anzeigen +SELECT path, diff_status, changed_columns, + old_line_status_std, new_line_status_std, + old_line_status_specification, new_line_status_specification, + old_line_name, new_line_name +FROM v_mig_diff +WHERE old_format_version = 'FV2504' + AND new_format_version = 'FV2510' + AND old_format = 'IFTSTA' + AND new_format = 'IFTSTA' + AND diff_status != 'unchanged' +ORDER BY sort_path; +``` + +
+ ### CLI Tool für XML➡️JSON Konvertierung Mit ```bash diff --git a/domain-specific-terms.txt b/domain-specific-terms.txt index 4943aed..d532406 100644 --- a/domain-specific-terms.txt +++ b/domain-specific-terms.txt @@ -18,3 +18,5 @@ rekursive finde contrl Elemente +segmente +hierarchie diff --git a/src/fundamend/sqlmodels/__init__.py b/src/fundamend/sqlmodels/__init__.py index be8f25a..69f59a6 100644 --- a/src/fundamend/sqlmodels/__init__.py +++ b/src/fundamend/sqlmodels/__init__.py @@ -37,6 +37,8 @@ MigSegmentGroup, MigSegmentGroupLink, ) +from .mig_diff_view import MigDiffLine, create_mig_diff_view +from .migview import MigHierarchyMaterialized, create_db_and_populate_with_mig_view, create_mig_view __all__ = [ "create_ahb_view", @@ -62,4 +64,9 @@ "MigSegment", "MigSegmentGroup", "MigSegmentGroupLink", + "create_mig_view", + "MigHierarchyMaterialized", + "create_db_and_populate_with_mig_view", + "create_mig_diff_view", + "MigDiffLine", ] diff --git a/src/fundamend/sqlmodels/create_mig_diff_view.sql b/src/fundamend/sqlmodels/create_mig_diff_view.sql new file mode 100644 index 0000000..3b8ae78 --- /dev/null +++ b/src/fundamend/sqlmodels/create_mig_diff_view.sql @@ -0,0 +1,185 @@ +-- Assume that materialize_mig_view.sql has been executed already. +-- This view allows comparing two MIG versions to find added, deleted, and modified rows. +-- +-- IMPORTANT: This view produces a cross-product of all version pairs. You MUST filter by version and format. +-- +-- Usage for comparing FV2410 -> FV2504 for UTILTS format: +-- SELECT * FROM v_mig_diff +-- WHERE old_format_version = 'FV2410' +-- AND old_format = 'UTILTS' +-- AND new_format_version = 'FV2504' +-- AND new_format = 'UTILTS' +-- ORDER BY sort_path; +-- +-- diff_status can be: 'added', 'deleted', 'modified', 'unchanged' +-- The view compares line_status_std, line_status_specification, and line_name to determine modifications. +-- +-- For deleted rows, old_ columns are populated and new_ columns are NULL. +-- For added rows, new_ columns are populated and old_ columns are NULL. +-- +-- MATCHING STRATEGY: +-- This view matches rows by their id_path column, which uses semantic qualifiers +-- (e.g., "SG2>SG3>FTX+ACD>C_C107>D_4441>") to identify rows across versions. +-- This is consistent with how the AHB diff view works. +-- Note: SQLite's "IS NOT" is a NULL-safe inequality operator (equivalent to SQL standard "IS DISTINCT FROM") + +DROP TABLE IF EXISTS v_mig_diff; +DROP VIEW IF EXISTS v_mig_diff; + +CREATE VIEW v_mig_diff AS +WITH version_pairs AS (SELECT DISTINCT old_v.edifact_format_version AS old_format_version, + old_v.format AS old_format, + new_v.edifact_format_version AS new_format_version, + new_v.format AS new_format + FROM (SELECT DISTINCT edifact_format_version, format FROM mig_hierarchy_materialized) old_v + JOIN (SELECT DISTINCT edifact_format_version, format + FROM mig_hierarchy_materialized) new_v + ON old_v.format = new_v.format + WHERE old_v.edifact_format_version < new_v.edifact_format_version), + +-- Pre-compute changed_columns once, derive diff_status from it + modified_check AS (SELECT TRIM( + CASE + WHEN old_tbl.line_status_std IS NOT new_tbl.line_status_std + THEN 'line_status_std, ' + ELSE '' END || + CASE + WHEN old_tbl.line_status_specification IS NOT new_tbl.line_status_specification + THEN 'line_status_specification, ' + ELSE '' END || + CASE + WHEN old_tbl.line_name IS NOT new_tbl.line_name + THEN 'line_name' + ELSE '' END + , ', ') AS changed_columns, + new_tbl.id_path AS id_path, + new_tbl.sort_path AS sort_path, + new_tbl.path AS path, + new_tbl.type AS line_type, + old_tbl.edifact_format_version AS old_format_version, + old_tbl.format AS old_format, + old_tbl.segmentgroup_id AS old_segmentgroup_id, + old_tbl.segment_id AS old_segment_id, + old_tbl.dataelement_id AS old_dataelement_id, + old_tbl.code_value AS old_code_value, + old_tbl.line_status_std AS old_line_status_std, + old_tbl.line_status_specification AS old_line_status_specification, + old_tbl.line_name AS old_line_name, + new_tbl.edifact_format_version AS new_format_version, + new_tbl.format AS new_format, + new_tbl.segmentgroup_id AS new_segmentgroup_id, + new_tbl.segment_id AS new_segment_id, + new_tbl.dataelement_id AS new_dataelement_id, + new_tbl.code_value AS new_code_value, + new_tbl.line_status_std AS new_line_status_std, + new_tbl.line_status_specification AS new_line_status_specification, + new_tbl.line_name AS new_line_name + FROM version_pairs vp + JOIN mig_hierarchy_materialized new_tbl + ON new_tbl.edifact_format_version = vp.new_format_version + AND new_tbl.format = vp.new_format + JOIN mig_hierarchy_materialized old_tbl + ON old_tbl.edifact_format_version = vp.old_format_version + AND old_tbl.format = vp.old_format + AND old_tbl.id_path = new_tbl.id_path) + +-- Modified and unchanged rows +SELECT CASE WHEN changed_columns != '' THEN 'modified' ELSE 'unchanged' END AS diff_status, + NULLIF(changed_columns, '') AS changed_columns, + id_path, + sort_path, + path, + line_type, + old_format_version, + old_format, + old_segmentgroup_id, + old_segment_id, + old_dataelement_id, + old_code_value, + old_line_status_std, + old_line_status_specification, + old_line_name, + new_format_version, + new_format, + new_segmentgroup_id, + new_segment_id, + new_dataelement_id, + new_code_value, + new_line_status_std, + new_line_status_specification, + new_line_name +FROM modified_check + +UNION ALL + +-- Added rows (exist in new but not in old for the specific version pair) +SELECT 'added' AS diff_status, + NULL AS changed_columns, + new_tbl.id_path, + new_tbl.sort_path, + new_tbl.path, + new_tbl.type AS line_type, + vp.old_format_version AS old_format_version, + vp.old_format AS old_format, + NULL AS old_segmentgroup_id, + NULL AS old_segment_id, + NULL AS old_dataelement_id, + NULL AS old_code_value, + NULL AS old_line_status_std, + NULL AS old_line_status_specification, + NULL AS old_line_name, + new_tbl.edifact_format_version AS new_format_version, + new_tbl.format AS new_format, + new_tbl.segmentgroup_id AS new_segmentgroup_id, + new_tbl.segment_id AS new_segment_id, + new_tbl.dataelement_id AS new_dataelement_id, + new_tbl.code_value AS new_code_value, + new_tbl.line_status_std AS new_line_status_std, + new_tbl.line_status_specification AS new_line_status_specification, + new_tbl.line_name AS new_line_name +FROM version_pairs vp + JOIN mig_hierarchy_materialized new_tbl + ON new_tbl.edifact_format_version = vp.new_format_version + AND new_tbl.format = vp.new_format +WHERE NOT EXISTS (SELECT 1 + FROM mig_hierarchy_materialized old_tbl + WHERE old_tbl.edifact_format_version = vp.old_format_version + AND old_tbl.format = vp.old_format + AND old_tbl.id_path = new_tbl.id_path) + +UNION ALL + +-- Deleted rows (exist in old but not in new for the specific version pair) +SELECT 'deleted' AS diff_status, + NULL AS changed_columns, + old_tbl.id_path, + old_tbl.sort_path, + old_tbl.path, + old_tbl.type AS line_type, + old_tbl.edifact_format_version AS old_format_version, + old_tbl.format AS old_format, + old_tbl.segmentgroup_id AS old_segmentgroup_id, + old_tbl.segment_id AS old_segment_id, + old_tbl.dataelement_id AS old_dataelement_id, + old_tbl.code_value AS old_code_value, + old_tbl.line_status_std AS old_line_status_std, + old_tbl.line_status_specification AS old_line_status_specification, + old_tbl.line_name AS old_line_name, + vp.new_format_version AS new_format_version, + vp.new_format AS new_format, + NULL AS new_segmentgroup_id, + NULL AS new_segment_id, + NULL AS new_dataelement_id, + NULL AS new_code_value, + NULL AS new_line_status_std, + NULL AS new_line_status_specification, + NULL AS new_line_name +FROM version_pairs vp + JOIN mig_hierarchy_materialized old_tbl + ON old_tbl.edifact_format_version = vp.old_format_version + AND old_tbl.format = vp.old_format +WHERE NOT EXISTS (SELECT 1 + FROM mig_hierarchy_materialized new_tbl + WHERE new_tbl.edifact_format_version = vp.new_format_version + AND new_tbl.format = vp.new_format + AND new_tbl.id_path = old_tbl.id_path); diff --git a/src/fundamend/sqlmodels/materialize_mig_view.sql b/src/fundamend/sqlmodels/materialize_mig_view.sql new file mode 100644 index 0000000..bcb3b2b --- /dev/null +++ b/src/fundamend/sqlmodels/materialize_mig_view.sql @@ -0,0 +1,851 @@ +-- This SQLite script materializes the hierarchy of the MIG (Message Implementation Guide) into a table. +-- This allows for easy querying without 'unrolling' the recursive segment (group) hierarchy each time. +-- There is a Pydantic model class for the 'mig_hierarchy_materialized' table: MigHierarchyMaterialized + +-- Drop previous materialized table if it exists +DROP TABLE IF EXISTS mig_hierarchy_materialized; + +-- ============================================================================ +-- Pre-compute semantic qualifiers for segments, segment groups, and data elements. +-- Same approach as materialize_ahb_view.sql — see that file for detailed comments. +-- ============================================================================ + +DROP TABLE IF EXISTS _seg_qual; +CREATE TEMP TABLE _seg_qual AS +SELECT s.primary_key AS pk, + COALESCE( + (SELECT c.value FROM migdataelement de JOIN migcode c ON c.data_element_primary_key = de.primary_key + WHERE de.segment_primary_key = s.primary_key AND de.data_element_group_primary_key IS NULL + ORDER BY de.position, c.position LIMIT 1), + (SELECT c.value FROM migdataelementgroup deg + JOIN migdataelement de ON de.data_element_group_primary_key = deg.primary_key + JOIN migcode c ON c.data_element_primary_key = de.primary_key + WHERE deg.segment_primary_key = s.primary_key + ORDER BY deg.position, de.position, c.position LIMIT 1) + ) AS qualifier +FROM migsegment s; +CREATE INDEX _idx_seg_qual ON _seg_qual(pk); + +DROP TABLE IF EXISTS _seg_needs_qual; +CREATE TEMP TABLE _seg_needs_qual AS +SELECT s1.primary_key AS pk FROM migsegment s1 +WHERE s1.segmentgroup_primary_key IS NOT NULL + AND EXISTS (SELECT 1 FROM migsegment s2 + WHERE s2.segmentgroup_primary_key = s1.segmentgroup_primary_key + AND s2.id = s1.id AND s2.primary_key != s1.primary_key) +UNION ALL +SELECT s1.primary_key FROM migsegment s1 +WHERE s1.segmentgroup_primary_key IS NULL + AND EXISTS (SELECT 1 FROM migsegment s2 + WHERE s2.segmentgroup_primary_key IS NULL + AND s2.mig_primary_key = s1.mig_primary_key + AND s2.id = s1.id AND s2.primary_key != s1.primary_key); +CREATE INDEX _idx_seg_needs_qual ON _seg_needs_qual(pk); + +DROP TABLE IF EXISTS _sg_qual; +CREATE TEMP TABLE _sg_qual AS +WITH RECURSIVE sg_qual_cte AS ( + SELECT s.segmentgroup_primary_key AS sg_pk, + (SELECT sq.qualifier FROM _seg_qual sq + JOIN migsegment s2 ON sq.pk = s2.primary_key + WHERE s2.segmentgroup_primary_key = s.segmentgroup_primary_key + ORDER BY s2.position LIMIT 1) AS qualifier + FROM migsegment s + WHERE s.segmentgroup_primary_key IS NOT NULL + GROUP BY s.segmentgroup_primary_key + UNION + SELECT link.parent_id AS sg_pk, child_q.qualifier + FROM migsegmentgrouplink link + JOIN sg_qual_cte child_q ON child_q.sg_pk = link.child_id + WHERE NOT EXISTS (SELECT 1 FROM migsegment s WHERE s.segmentgroup_primary_key = link.parent_id) +) +SELECT sg_pk AS pk, MIN(qualifier) AS qualifier FROM sg_qual_cte GROUP BY sg_pk HAVING qualifier IS NOT NULL; +CREATE INDEX _idx_sg_qual ON _sg_qual(pk); + +DROP TABLE IF EXISTS _sg_needs_qual; +CREATE TEMP TABLE _sg_needs_qual AS +SELECT child1.primary_key AS pk +FROM migsegmentgrouplink link1 +JOIN migsegmentgroup child1 ON link1.child_id = child1.primary_key +WHERE EXISTS (SELECT 1 FROM migsegmentgrouplink link2 + JOIN migsegmentgroup child2 ON link2.child_id = child2.primary_key + WHERE link2.parent_id = link1.parent_id + AND child2.id = child1.id AND child2.primary_key != child1.primary_key) +UNION ALL +SELECT sg1.primary_key FROM migsegmentgroup sg1 +WHERE NOT EXISTS (SELECT 1 FROM migsegmentgrouplink link WHERE link.child_id = sg1.primary_key) + AND EXISTS (SELECT 1 FROM migsegmentgroup sg2 + WHERE sg2.mig_primary_key = sg1.mig_primary_key + AND sg2.id = sg1.id AND sg2.primary_key != sg1.primary_key + AND NOT EXISTS (SELECT 1 FROM migsegmentgrouplink link WHERE link.child_id = sg2.primary_key)); +CREATE INDEX _idx_sg_needs_qual ON _sg_needs_qual(pk); + +DROP TABLE IF EXISTS _de_qual; +CREATE TEMP TABLE _de_qual AS +SELECT de.primary_key AS pk, + (SELECT c.value FROM migcode c WHERE c.data_element_primary_key = de.primary_key + ORDER BY c.position LIMIT 1) AS qualifier +FROM migdataelement de; +CREATE INDEX _idx_de_qual ON _de_qual(pk); + +DROP TABLE IF EXISTS _de_needs_qual; +CREATE TEMP TABLE _de_needs_qual AS +SELECT de1.primary_key AS pk FROM migdataelement de1 +WHERE de1.data_element_group_primary_key IS NOT NULL + AND EXISTS (SELECT 1 FROM migdataelement de2 + WHERE de2.data_element_group_primary_key = de1.data_element_group_primary_key + AND de2.id = de1.id AND de2.primary_key != de1.primary_key) +UNION ALL +SELECT de1.primary_key FROM migdataelement de1 +WHERE de1.data_element_group_primary_key IS NULL + AND de1.segment_primary_key IS NOT NULL + AND EXISTS (SELECT 1 FROM migdataelement de2 + WHERE de2.segment_primary_key = de1.segment_primary_key + AND de2.data_element_group_primary_key IS NULL + AND de2.id = de1.id AND de2.primary_key != de1.primary_key); +CREATE INDEX _idx_de_needs_qual ON _de_needs_qual(pk); + +-- ============================================================================ +-- Materialize hierarchy for ALL MIGs +-- ============================================================================ +CREATE TABLE mig_hierarchy_materialized AS +WITH RECURSIVE + + ordered_roots AS (SELECT sg.primary_key, + sg.position, + 'segment_group' AS type, + sg.id AS root_id_text, + sg.name, + sg.status_std, + sg.status_specification, + sg.counter, + sg.level, + sg.max_rep_std, + sg.max_rep_specification, + sg.mig_primary_key, + NULL AS number, + NULL AS example, + NULL AS description, + mig.format, + mig.versionsnummer, + mig.gueltig_von, + mig.gueltig_bis, + mig.edifact_format_version, + NULL AS is_on_uebertragungsdatei_level + FROM migsegmentgroup sg + JOIN messageimplementationguide mig ON sg.mig_primary_key = mig.primary_key + WHERE sg.mig_primary_key IS NOT NULL + + UNION ALL + + SELECT s.primary_key, + s.position, + 'segment' AS type, + s.id AS root_id_text, + s.name, + s.status_std, + s.status_specification, + s.counter, + s.level, + s.max_rep_std, + s.max_rep_specification, + s.mig_primary_key, + s.number, + s.example, + s.description, + mig.format, + mig.versionsnummer, + mig.gueltig_von, + mig.gueltig_bis, + mig.edifact_format_version, + s.is_on_uebertragungsdatei_level + FROM migsegment s + JOIN messageimplementationguide mig ON s.mig_primary_key = mig.primary_key + WHERE s.segmentgroup_primary_key IS NULL + AND s.mig_primary_key IS NOT NULL), + + ordered_roots_with_order AS (SELECT *, + ROW_NUMBER() OVER ( + PARTITION BY mig_primary_key + ORDER BY position + ) AS root_order + FROM ordered_roots), + + root_hierarchy AS (SELECT o.mig_primary_key AS mig_pk, + o.primary_key AS current_id, + o.primary_key AS root_id, + NULL AS parent_id, + 0 AS depth, + o.position, + o.name AS path, + o.name AS parent_path, + o.root_order, + o.type, + o.primary_key AS source_id, + substr('00000' || o.position, -5) || '-' AS sort_path, + o.root_id_text || CASE + WHEN o.type = 'segment_group' + AND EXISTS (SELECT 1 FROM _sg_needs_qual sgnq WHERE sgnq.pk = o.primary_key) + THEN COALESCE('+' || (SELECT sgq.qualifier FROM _sg_qual sgq WHERE sgq.pk = o.primary_key), '') + WHEN o.type = 'segment' + AND EXISTS (SELECT 1 FROM _seg_needs_qual snq WHERE snq.pk = o.primary_key) + THEN COALESCE('+' || (SELECT sq.qualifier FROM _seg_qual sq WHERE sq.pk = o.primary_key), '') + ELSE '' + END || '>' AS id_path, + o.format, + o.versionsnummer, + o.gueltig_von, + o.gueltig_bis, + o.edifact_format_version, + o.is_on_uebertragungsdatei_level, + + -- Segment Group fields + CASE WHEN o.type = 'segment_group' THEN o.root_id_text ELSE NULL END AS segmentgroup_id, + CASE WHEN o.type = 'segment_group' THEN o.name ELSE NULL END AS segmentgroup_name, + CASE WHEN o.type = 'segment_group' THEN o.status_std ELSE NULL END AS segmentgroup_status_std, + CASE + WHEN o.type = 'segment_group' THEN o.status_specification + ELSE NULL END AS segmentgroup_status_specification, + CASE WHEN o.type = 'segment_group' THEN o.counter ELSE NULL END AS segmentgroup_counter, + CASE WHEN o.type = 'segment_group' THEN o.level ELSE NULL END AS segmentgroup_level, + CASE WHEN o.type = 'segment_group' THEN o.max_rep_std ELSE NULL END AS segmentgroup_max_rep_std, + CASE + WHEN o.type = 'segment_group' THEN o.max_rep_specification + ELSE NULL END AS segmentgroup_max_rep_specification, + CASE WHEN o.type = 'segment_group' THEN o.position ELSE NULL END AS segmentgroup_position, + + -- Segment fields + CASE WHEN o.type = 'segment' THEN o.root_id_text ELSE NULL END AS segment_id, + CASE WHEN o.type = 'segment' THEN o.name ELSE NULL END AS segment_name, + CASE WHEN o.type = 'segment' THEN o.status_std ELSE NULL END AS segment_status_std, + CASE + WHEN o.type = 'segment' THEN o.status_specification + ELSE NULL END AS segment_status_specification, + CASE WHEN o.type = 'segment' THEN o.counter ELSE NULL END AS segment_counter, + CASE WHEN o.type = 'segment' THEN o.level ELSE NULL END AS segment_level, + CASE WHEN o.type = 'segment' THEN o.number ELSE NULL END AS segment_number, + CASE WHEN o.type = 'segment' THEN o.max_rep_std ELSE NULL END AS segment_max_rep_std, + CASE + WHEN o.type = 'segment' THEN o.max_rep_specification + ELSE NULL END AS segment_max_rep_specification, + CASE WHEN o.type = 'segment' THEN o.example ELSE NULL END AS segment_example, + CASE WHEN o.type = 'segment' THEN o.description ELSE NULL END AS segment_description, + CASE WHEN o.type = 'segment' THEN o.position ELSE NULL END AS segment_position, + + -- Data Element Group fields (NULL at root level) + NULL AS dataelementgroup_id, + NULL AS dataelementgroup_name, + NULL AS dataelementgroup_description, + NULL AS dataelementgroup_status_std, + NULL AS dataelementgroup_status_specification, + NULL AS dataelementgroup_position, + + -- Data Element fields (NULL at root level) + NULL AS dataelement_id, + NULL AS dataelement_name, + NULL AS dataelement_description, + NULL AS dataelement_status_std, + NULL AS dataelement_status_specification, + NULL AS dataelement_format_std, + NULL AS dataelement_format_specification, + NULL AS dataelement_position, + + -- Code fields (NULL at root level) + NULL AS code_id, + NULL AS code_name, + NULL AS code_description, + NULL AS code_value, + NULL AS code_position + FROM ordered_roots_with_order o), + + hierarchy AS (SELECT * + FROM root_hierarchy + + UNION ALL + + -- Nested segment groups (via link table) + SELECT h.mig_pk, + child.primary_key, + h.root_id, + link.parent_id, + h.depth + 1, + child.position, + h.path || ' > ' || child.name, + h.path, + h.root_order, + 'segment_group', + h.source_id, + h.sort_path || substr('00000' || child.position, -5) || '-', + h.id_path || child.id || CASE + WHEN EXISTS (SELECT 1 FROM _sg_needs_qual sgnq WHERE sgnq.pk = child.primary_key) + THEN COALESCE('+' || (SELECT sgq.qualifier FROM _sg_qual sgq WHERE sgq.pk = child.primary_key), '') + ELSE '' + END || '>', + h.format, + h.versionsnummer, + h.gueltig_von, + h.gueltig_bis, + h.edifact_format_version, + h.is_on_uebertragungsdatei_level, + + child.id, + child.name, + child.status_std, + child.status_specification, + child.counter, + child.level, + child.max_rep_std, + child.max_rep_specification, + child.position, + + h.segment_id, + h.segment_name, + h.segment_status_std, + h.segment_status_specification, + h.segment_counter, + h.segment_level, + h.segment_number, + h.segment_max_rep_std, + h.segment_max_rep_specification, + h.segment_example, + h.segment_description, + h.segment_position, + + h.dataelementgroup_id, + h.dataelementgroup_name, + h.dataelementgroup_description, + h.dataelementgroup_status_std, + h.dataelementgroup_status_specification, + h.dataelementgroup_position, + + h.dataelement_id, + h.dataelement_name, + h.dataelement_description, + h.dataelement_status_std, + h.dataelement_status_specification, + h.dataelement_format_std, + h.dataelement_format_specification, + h.dataelement_position, + + h.code_id, + h.code_name, + h.code_description, + h.code_value, + h.code_position + FROM hierarchy h + JOIN migsegmentgrouplink link ON h.current_id = link.parent_id + JOIN migsegmentgroup child ON link.child_id = child.primary_key + WHERE h.type = 'segment_group' + + UNION ALL + + -- Segments within segment groups + SELECT h.mig_pk, + s.primary_key, + h.root_id, + s.segmentgroup_primary_key, + h.depth + 1, + s.position, + h.path || ' > ' || s.name, + h.path, + h.root_order, + 'segment', + h.source_id, + h.sort_path || substr('00000' || s.position, -5) || '-', + h.id_path || s.id || CASE + WHEN EXISTS (SELECT 1 FROM _seg_needs_qual snq WHERE snq.pk = s.primary_key) + THEN COALESCE('+' || (SELECT sq.qualifier FROM _seg_qual sq WHERE sq.pk = s.primary_key), '') + ELSE '' + END || '>', + h.format, + h.versionsnummer, + h.gueltig_von, + h.gueltig_bis, + h.edifact_format_version, + s.is_on_uebertragungsdatei_level, + + h.segmentgroup_id, + h.segmentgroup_name, + h.segmentgroup_status_std, + h.segmentgroup_status_specification, + h.segmentgroup_counter, + h.segmentgroup_level, + h.segmentgroup_max_rep_std, + h.segmentgroup_max_rep_specification, + h.segmentgroup_position, + + s.id, + s.name, + s.status_std, + s.status_specification, + s.counter, + s.level, + s.number, + s.max_rep_std, + s.max_rep_specification, + s.example, + s.description, + s.position, + + h.dataelementgroup_id, + h.dataelementgroup_name, + h.dataelementgroup_description, + h.dataelementgroup_status_std, + h.dataelementgroup_status_specification, + h.dataelementgroup_position, + + h.dataelement_id, + h.dataelement_name, + h.dataelement_description, + h.dataelement_status_std, + h.dataelement_status_specification, + h.dataelement_format_std, + h.dataelement_format_specification, + h.dataelement_position, + + h.code_id, + h.code_name, + h.code_description, + h.code_value, + h.code_position + FROM hierarchy h + JOIN migsegment s ON s.segmentgroup_primary_key = h.current_id + WHERE h.type = 'segment_group' + + UNION ALL + + -- Data element groups within segments + SELECT h.mig_pk, + deg.primary_key, + h.root_id, + deg.segment_primary_key, + h.depth + 1, + deg.position, + h.path || ' > ' || deg.name, + h.path, + h.root_order, + 'dataelementgroup', + h.source_id, + h.sort_path || substr('00000' || deg.position, -5) || '-', + h.id_path || deg.id || '>', + h.format, + h.versionsnummer, + h.gueltig_von, + h.gueltig_bis, + h.edifact_format_version, + h.is_on_uebertragungsdatei_level, + + h.segmentgroup_id, + h.segmentgroup_name, + h.segmentgroup_status_std, + h.segmentgroup_status_specification, + h.segmentgroup_counter, + h.segmentgroup_level, + h.segmentgroup_max_rep_std, + h.segmentgroup_max_rep_specification, + h.segmentgroup_position, + + h.segment_id, + h.segment_name, + h.segment_status_std, + h.segment_status_specification, + h.segment_counter, + h.segment_level, + h.segment_number, + h.segment_max_rep_std, + h.segment_max_rep_specification, + h.segment_example, + h.segment_description, + h.segment_position, + + deg.id, + deg.name, + deg.description, + deg.status_std, + deg.status_specification, + deg.position, + + h.dataelement_id, + h.dataelement_name, + h.dataelement_description, + h.dataelement_status_std, + h.dataelement_status_specification, + h.dataelement_format_std, + h.dataelement_format_specification, + h.dataelement_position, + + h.code_id, + h.code_name, + h.code_description, + h.code_value, + h.code_position + FROM hierarchy h + JOIN migdataelementgroup deg ON deg.segment_primary_key = h.current_id + WHERE h.type = 'segment' + + UNION ALL + + -- Data elements directly within segments (no group) + SELECT h.mig_pk, + de.primary_key, + h.root_id, + de.segment_primary_key, + h.depth + 1, + de.position, + h.path || ' > ' || de.name, + h.path, + h.root_order, + 'dataelement', + h.source_id, + h.sort_path || substr('00000' || de.position, -5) || '-', + h.id_path || de.id || CASE + WHEN EXISTS (SELECT 1 FROM _de_needs_qual dnq WHERE dnq.pk = de.primary_key) + THEN COALESCE('+' || (SELECT dq.qualifier FROM _de_qual dq WHERE dq.pk = de.primary_key), '') + ELSE '' + END || '>', + h.format, + h.versionsnummer, + h.gueltig_von, + h.gueltig_bis, + h.edifact_format_version, + h.is_on_uebertragungsdatei_level, + + h.segmentgroup_id, + h.segmentgroup_name, + h.segmentgroup_status_std, + h.segmentgroup_status_specification, + h.segmentgroup_counter, + h.segmentgroup_level, + h.segmentgroup_max_rep_std, + h.segmentgroup_max_rep_specification, + h.segmentgroup_position, + + h.segment_id, + h.segment_name, + h.segment_status_std, + h.segment_status_specification, + h.segment_counter, + h.segment_level, + h.segment_number, + h.segment_max_rep_std, + h.segment_max_rep_specification, + h.segment_example, + h.segment_description, + h.segment_position, + + h.dataelementgroup_id, + h.dataelementgroup_name, + h.dataelementgroup_description, + h.dataelementgroup_status_std, + h.dataelementgroup_status_specification, + h.dataelementgroup_position, + + de.id, + de.name, + de.description, + de.status_std, + de.status_specification, + de.format_std, + de.format_specification, + de.position, + + h.code_id, + h.code_name, + h.code_description, + h.code_value, + h.code_position + FROM hierarchy h + JOIN migdataelement de ON de.segment_primary_key = h.current_id + WHERE h.type = 'segment' + AND de.data_element_group_primary_key IS NULL + + UNION ALL + + -- Data elements within data element groups + SELECT h.mig_pk, + de.primary_key, + h.root_id, + de.data_element_group_primary_key, + h.depth + 1, + de.position, + h.path || ' > ' || de.name, + h.path, + h.root_order, + 'dataelement', + h.source_id, + h.sort_path || substr('00000' || de.position, -5) || '-', + h.id_path || de.id || CASE + WHEN EXISTS (SELECT 1 FROM _de_needs_qual dnq WHERE dnq.pk = de.primary_key) + THEN COALESCE('+' || (SELECT dq.qualifier FROM _de_qual dq WHERE dq.pk = de.primary_key), '') + ELSE '' + END || '>', + h.format, + h.versionsnummer, + h.gueltig_von, + h.gueltig_bis, + h.edifact_format_version, + h.is_on_uebertragungsdatei_level, + + h.segmentgroup_id, + h.segmentgroup_name, + h.segmentgroup_status_std, + h.segmentgroup_status_specification, + h.segmentgroup_counter, + h.segmentgroup_level, + h.segmentgroup_max_rep_std, + h.segmentgroup_max_rep_specification, + h.segmentgroup_position, + + h.segment_id, + h.segment_name, + h.segment_status_std, + h.segment_status_specification, + h.segment_counter, + h.segment_level, + h.segment_number, + h.segment_max_rep_std, + h.segment_max_rep_specification, + h.segment_example, + h.segment_description, + h.segment_position, + + h.dataelementgroup_id, + h.dataelementgroup_name, + h.dataelementgroup_description, + h.dataelementgroup_status_std, + h.dataelementgroup_status_specification, + h.dataelementgroup_position, + + de.id, + de.name, + de.description, + de.status_std, + de.status_specification, + de.format_std, + de.format_specification, + de.position, + + h.code_id, + h.code_name, + h.code_description, + h.code_value, + h.code_position + FROM hierarchy h + JOIN migdataelement de ON de.data_element_group_primary_key = h.current_id + WHERE h.type = 'dataelementgroup' + + UNION ALL + + -- Codes within data elements + SELECT h.mig_pk, + c.primary_key, + h.root_id, + c.data_element_primary_key, + h.depth + 1, + c.position, + h.path || ' > ' || c.name, + h.path, + h.root_order, + 'code', + h.source_id, + h.sort_path || substr('00000' || c.position, -5) || '-', + h.id_path || c.value || '>', + h.format, + h.versionsnummer, + h.gueltig_von, + h.gueltig_bis, + h.edifact_format_version, + h.is_on_uebertragungsdatei_level, + + h.segmentgroup_id, + h.segmentgroup_name, + h.segmentgroup_status_std, + h.segmentgroup_status_specification, + h.segmentgroup_counter, + h.segmentgroup_level, + h.segmentgroup_max_rep_std, + h.segmentgroup_max_rep_specification, + h.segmentgroup_position, + + h.segment_id, + h.segment_name, + h.segment_status_std, + h.segment_status_specification, + h.segment_counter, + h.segment_level, + h.segment_number, + h.segment_max_rep_std, + h.segment_max_rep_specification, + h.segment_example, + h.segment_description, + h.segment_position, + + h.dataelementgroup_id, + h.dataelementgroup_name, + h.dataelementgroup_description, + h.dataelementgroup_status_std, + h.dataelementgroup_status_specification, + h.dataelementgroup_position, + + h.dataelement_id, + h.dataelement_name, + h.dataelement_description, + h.dataelement_status_std, + h.dataelement_status_specification, + h.dataelement_format_std, + h.dataelement_format_specification, + h.dataelement_position, + + c.primary_key, + c.name, + c.description, + c.value, + c.position + FROM hierarchy h + JOIN migcode c ON c.data_element_primary_key = h.current_id + WHERE h.type = 'dataelement') + +SELECT hex(randomblob(16)) AS id, + *, + -- Computed columns for easier querying + trim( + coalesce( + code_name, + dataelement_name, + dataelementgroup_name, + segment_name, + segmentgroup_name + ) + ) AS line_name, + trim( + coalesce( + dataelement_status_std, + dataelementgroup_status_std, + segment_status_std, + segmentgroup_status_std + ) + ) AS line_status_std, + trim( + coalesce( + dataelement_status_specification, + dataelementgroup_status_specification, + segment_status_specification, + segmentgroup_status_specification + ) + ) AS line_status_specification +FROM hierarchy +ORDER BY mig_pk, sort_path; + + +-- Create indexes for efficient querying +CREATE UNIQUE INDEX idx_mig_hierarchy_id ON mig_hierarchy_materialized (id); +CREATE INDEX idx_mig_hierarchy_mig_pk ON mig_hierarchy_materialized (mig_pk); +CREATE INDEX idx_mig_hierarchy_mig_pk_sort ON mig_hierarchy_materialized (mig_pk, sort_path); +CREATE INDEX idx_mig_hierarchy_type ON mig_hierarchy_materialized (type); +CREATE INDEX idx_mig_hierarchy_format ON mig_hierarchy_materialized (format); +CREATE INDEX idx_mig_hierarchy_format_version ON mig_hierarchy_materialized (format, edifact_format_version); +CREATE INDEX idx_mig_hierarchy_versionsnummer ON mig_hierarchy_materialized (versionsnummer); +CREATE INDEX idx_mig_hierarchy_gueltig_von ON mig_hierarchy_materialized (gueltig_von); +CREATE INDEX idx_mig_hierarchy_gueltig_bis ON mig_hierarchy_materialized (gueltig_bis); +CREATE INDEX idx_mig_hierarchy_edifact_format_version ON mig_hierarchy_materialized (edifact_format_version); + +-- Segment group indexes +CREATE INDEX idx_mig_hierarchy_segmentgroup_id ON mig_hierarchy_materialized (segmentgroup_id); +CREATE INDEX idx_mig_hierarchy_segmentgroup_name ON mig_hierarchy_materialized (segmentgroup_name); +CREATE INDEX idx_mig_hierarchy_segmentgroup_position ON mig_hierarchy_materialized (segmentgroup_position); + +-- Segment indexes +CREATE INDEX idx_mig_hierarchy_segment_id ON mig_hierarchy_materialized (segment_id); +CREATE INDEX idx_mig_hierarchy_segment_name ON mig_hierarchy_materialized (segment_name); +CREATE INDEX idx_mig_hierarchy_segment_number ON mig_hierarchy_materialized (segment_number); +CREATE INDEX idx_mig_hierarchy_segment_position ON mig_hierarchy_materialized (segment_position); + +-- Data element group indexes +CREATE INDEX idx_mig_hierarchy_dataelementgroup_id ON mig_hierarchy_materialized (dataelementgroup_id); +CREATE INDEX idx_mig_hierarchy_dataelementgroup_name ON mig_hierarchy_materialized (dataelementgroup_name); +CREATE INDEX idx_mig_hierarchy_dataelementgroup_position ON mig_hierarchy_materialized (dataelementgroup_position); + +-- Data element indexes +CREATE INDEX idx_mig_hierarchy_dataelement_id ON mig_hierarchy_materialized (dataelement_id); +CREATE INDEX idx_mig_hierarchy_dataelement_name ON mig_hierarchy_materialized (dataelement_name); +CREATE INDEX idx_mig_hierarchy_dataelement_position ON mig_hierarchy_materialized (dataelement_position); + +-- Code indexes +CREATE INDEX idx_mig_hierarchy_code_id ON mig_hierarchy_materialized (code_id); +CREATE INDEX idx_mig_hierarchy_code_name ON mig_hierarchy_materialized (code_name); +CREATE INDEX idx_mig_hierarchy_code_value ON mig_hierarchy_materialized (code_value); +CREATE INDEX idx_mig_hierarchy_code_position ON mig_hierarchy_materialized (code_position); + +-- Path indexes +CREATE INDEX idx_mig_hierarchy_path ON mig_hierarchy_materialized (path); +CREATE INDEX idx_mig_hierarchy_id_path ON mig_hierarchy_materialized (id_path); +CREATE INDEX idx_mig_hierarchy_sort ON mig_hierarchy_materialized (sort_path); + +-- Computed column indexes +CREATE INDEX idx_mig_line_name ON mig_hierarchy_materialized (line_name); +CREATE INDEX idx_mig_line_status_std ON mig_hierarchy_materialized (line_status_std); +CREATE INDEX idx_mig_line_status_specification ON mig_hierarchy_materialized (line_status_specification); + +-- Fallback: append occurrence counter '#N' to any id_paths still not unique after qualifier injection. +CREATE TEMP TABLE _id_path_counter_fix AS +SELECT id, + id_path || '#' || ROW_NUMBER() OVER ( + PARTITION BY id_path, format, edifact_format_version + ORDER BY sort_path, id + ) AS new_id_path +FROM mig_hierarchy_materialized +WHERE id IN (SELECT h1.id + FROM mig_hierarchy_materialized h1 + WHERE EXISTS (SELECT 1 + FROM mig_hierarchy_materialized h2 + WHERE h2.id_path = h1.id_path + AND h2.format = h1.format + AND (h2.edifact_format_version = h1.edifact_format_version OR + (h2.edifact_format_version IS NULL AND h1.edifact_format_version IS NULL)) + AND h2.id != h1.id)); + +CREATE UNIQUE INDEX _idx_counter_fix ON _id_path_counter_fix(id); + +UPDATE mig_hierarchy_materialized +SET id_path = (SELECT cf.new_id_path FROM _id_path_counter_fix cf WHERE cf.id = mig_hierarchy_materialized.id) +WHERE id IN (SELECT id FROM _id_path_counter_fix); + +DROP TABLE _id_path_counter_fix; + +-- Clean up qualifier temp tables +DROP TABLE IF EXISTS _seg_qual; +DROP TABLE IF EXISTS _seg_needs_qual; +DROP TABLE IF EXISTS _sg_qual; +DROP TABLE IF EXISTS _sg_needs_qual; +DROP TABLE IF EXISTS _de_qual; +DROP TABLE IF EXISTS _de_needs_qual; + +-- Append counter '#N' to path where duplicates exist (for diff view matching) +CREATE TEMP TABLE _path_counter_fix AS +SELECT id, + path || ' #' || ROW_NUMBER() OVER ( + PARTITION BY path, format, edifact_format_version + ORDER BY sort_path, id + ) AS new_path +FROM mig_hierarchy_materialized +WHERE id IN (SELECT h1.id + FROM mig_hierarchy_materialized h1 + WHERE EXISTS (SELECT 1 + FROM mig_hierarchy_materialized h2 + WHERE h2.path = h1.path + AND h2.format = h1.format + AND (h2.edifact_format_version = h1.edifact_format_version OR + (h2.edifact_format_version IS NULL AND h1.edifact_format_version IS NULL)) + AND h2.id != h1.id)); + +CREATE UNIQUE INDEX _idx_path_counter_fix ON _path_counter_fix(id); + +UPDATE mig_hierarchy_materialized +SET path = (SELECT pf.new_path FROM _path_counter_fix pf WHERE pf.id = mig_hierarchy_materialized.id) +WHERE id IN (SELECT id FROM _path_counter_fix); + +DROP TABLE _path_counter_fix; + +-- Unique indexes for diff view support +CREATE UNIQUE INDEX idx_mig_hierarchy_id_path_per_mig ON mig_hierarchy_materialized (edifact_format_version, format, id_path); +CREATE UNIQUE INDEX idx_mig_hierarchy_path_per_mig ON mig_hierarchy_materialized (edifact_format_version, format, path); diff --git a/src/fundamend/sqlmodels/mig_diff_view.py b/src/fundamend/sqlmodels/mig_diff_view.py new file mode 100644 index 0000000..c65d680 --- /dev/null +++ b/src/fundamend/sqlmodels/mig_diff_view.py @@ -0,0 +1,114 @@ +""" +This module contains the SQLModel class for the MIG diff view and a function to create it. +The view allows comparing two MIG versions to find rows that were added, deleted, or modified. +""" + +# pylint: disable=duplicate-code +# This module intentionally follows the same patterns as ahb_diff_view.py + +import logging +from pathlib import Path +from typing import Optional + +import sqlalchemy +from efoli import EdifactFormat, EdifactFormatVersion +from sqlmodel import Field, Session, SQLModel + +from fundamend.sqlmodels.internals import _execute_bare_sql + +_logger = logging.getLogger(__name__) + + +def _check_mig_hierarchy_exists_and_has_data(session: Session) -> None: + """Check if mig_hierarchy_materialized exists and has data, logging warnings if not.""" + try: + result = session.execute(sqlalchemy.text("SELECT COUNT(*) FROM mig_hierarchy_materialized")) + count = result.scalar() + if count == 0: + _logger.warning( + "mig_hierarchy_materialized exists but is empty. " + "The v_mig_diff view will not return any results. " + "Make sure to call create_mig_view() after populating the database." + ) + except sqlalchemy.exc.OperationalError: + _logger.warning( + "mig_hierarchy_materialized does not exist. " + "The v_mig_diff view requires mig_hierarchy_materialized to be created first. " + "Call create_mig_view() before create_mig_diff_view()." + ) + + +def create_mig_diff_view(session: Session) -> None: + """ + Create a view for comparing MIG versions. + This assumes that create_mig_view (materialize_mig_view.sql) has already been called. + """ + _check_mig_hierarchy_exists_and_has_data(session) + _execute_bare_sql(session=session, path_to_sql_commands=Path(__file__).parent / "create_mig_diff_view.sql") + _logger.info("Created view %s", MigDiffLine.__tablename__) + + +class MigDiffLine(SQLModel, table=True): + """ + Model that represents the diff view for comparing MIG versions. + This view uses mig_hierarchy_materialized structure and compares line_status_std, + line_status_specification, and line_name. + + Query with all 4 filter parameters to compare two specific versions: + + SELECT * FROM v_mig_diff + WHERE old_format_version = 'FV2410' + AND new_format_version = 'FV2504' + AND old_format = 'UTILTS' + AND new_format = 'UTILTS' + ORDER BY sort_path; + + diff_status can be: 'added', 'deleted', 'modified', 'unchanged' + All value columns exist twice (old_ and new_) to show the values from both versions. + + MATCHING STRATEGY: + This view matches rows by their id_path column, which uses semantic qualifiers + (e.g., "SG2>SG3>FTX+ACD>C_C107>D_4441>") to identify rows across versions. + This is consistent with how the AHB diff view works. + """ + + __tablename__ = "v_mig_diff" + + # Composite primary key + id_path: str = Field(primary_key=True) + old_format_version: Optional[EdifactFormatVersion] = Field(primary_key=True, default=None) + new_format_version: Optional[EdifactFormatVersion] = Field(primary_key=True, default=None) + old_format: Optional[EdifactFormat] = Field(primary_key=True, default=None) + new_format: Optional[EdifactFormat] = Field(primary_key=True, default=None) + + # Common fields + sort_path: str = Field() + path: str = Field() + line_type: Optional[str] = Field(default=None) + + # Diff status: 'added', 'deleted', 'modified', 'unchanged' + diff_status: str = Field() + + # Which columns changed (for modified rows only, NULL otherwise) + changed_columns: Optional[str] = Field(default=None) + + # Old version columns + old_segmentgroup_id: Optional[str] = Field(default=None) + old_segment_id: Optional[str] = Field(default=None) + old_dataelement_id: Optional[str] = Field(default=None) + old_code_value: Optional[str] = Field(default=None) + old_line_status_std: Optional[str] = Field(default=None) + old_line_status_specification: Optional[str] = Field(default=None) + old_line_name: Optional[str] = Field(default=None) + + # New version columns + new_segmentgroup_id: Optional[str] = Field(default=None) + new_segment_id: Optional[str] = Field(default=None) + new_dataelement_id: Optional[str] = Field(default=None) + new_code_value: Optional[str] = Field(default=None) + new_line_status_std: Optional[str] = Field(default=None) + new_line_status_specification: Optional[str] = Field(default=None) + new_line_name: Optional[str] = Field(default=None) + + +__all__ = ["create_mig_diff_view", "MigDiffLine"] diff --git a/src/fundamend/sqlmodels/migview.py b/src/fundamend/sqlmodels/migview.py new file mode 100644 index 0000000..32bee88 --- /dev/null +++ b/src/fundamend/sqlmodels/migview.py @@ -0,0 +1,236 @@ +""" +Helper module to create a "materialized view" for MIGs (Message Implementation Guides). +In SQLite this means: create and populate a plain table. +""" + +# pylint: disable=duplicate-code +# This module intentionally follows the same patterns as ahbview.py + +import logging +import tempfile +from datetime import date +from pathlib import Path +from typing import Iterable, Literal, Optional +from uuid import UUID + +import sqlalchemy +from efoli import EdifactFormat, EdifactFormatVersion, get_edifact_format_version +from sqlalchemy.sql.elements import TextClause + +try: + from sqlalchemy.sql.functions import func + from sqlmodel import Field, Session, SQLModel, create_engine, select +except ImportError as import_error: + import_error.msg += "; Did you install fundamend[sqlmodels] or did you try to import from fundamend.models instead?" + raise + +from fundamend import MessageImplementationGuide as PydanticMessageImplementationGuide +from fundamend import MigReader +from fundamend.sqlmodels.internals import _execute_bare_sql +from fundamend.sqlmodels.messageimplementationguide import MessageImplementationGuide as SqlMessageImplementationGuide +from fundamend.sqlmodels.messageimplementationguide import ( + MigCode, + MigDataElement, + MigDataElementGroup, + MigSegment, + MigSegmentGroup, + MigSegmentGroupLink, +) + +_logger = logging.getLogger(__name__) + + +def create_mig_view(session: Session) -> None: + """ + Create a materialized view for the Message Implementation Guides using a SQLAlchemy session. + Warning: This is only tested for SQLite! + """ + _execute_bare_sql(session=session, path_to_sql_commands=Path(__file__).parent / "materialize_mig_view.sql") + + number_of_inserted_rows = session.scalar( + select(func.count(MigHierarchyMaterialized.id)) # type: ignore[arg-type] # pylint:disable=not-callable + ) + _logger.info( + "Inserted %d rows into the materialized view %s", + number_of_inserted_rows, + MigHierarchyMaterialized.__tablename__, + ) + + +_before_bulk_insert_ops: list[TextClause] = [ + sqlalchemy.text("PRAGMA synchronous = OFF"), + sqlalchemy.text("PRAGMA journal_mode = WAL"), + sqlalchemy.text("PRAGMA cache_size = -64000"), + sqlalchemy.text("PRAGMA temp_store = MEMORY"), + sqlalchemy.text("PRAGMA locking_mode = EXCLUSIVE"), +] +_after_bulk_insert_ops: list[TextClause] = [ + sqlalchemy.text("PRAGMA wal_checkpoint(FULL)"), + sqlalchemy.text("PRAGMA journal_mode = DELETE"), + sqlalchemy.text("PRAGMA locking_mode = NORMAL"), + sqlalchemy.text("PRAGMA synchronous = FULL"), +] + + +def create_db_and_populate_with_mig_view( + mig_files: Iterable[Path | tuple[Path, date, Optional[date]] | tuple[Path, Literal[None], Literal[None]]], + drop_raw_tables: bool = False, +) -> Path: + """ + Creates a SQLite database as temporary file, populates it with the MIGs provided and materializes the MIG view. + You may provide either paths to the MIG.xml files or tuples where each Path comes with a gueltig_von and gueltig_bis + date. + Optionally deletes the original tables to have a smaller db file. + Returns the path to the temporary database file. + The calling code should move the file to a permanent location if needed. + """ + with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as sqlite_file: + sqlite_path = Path(sqlite_file.name) + engine = create_engine(f"sqlite:///{sqlite_path}") + SQLModel.metadata.drop_all(engine) + SQLModel.metadata.create_all(engine) + + with engine.connect() as conn: + for _op in _before_bulk_insert_ops: + conn.execute(_op) + conn.commit() + + with Session(bind=engine) as session: + sql_migs: list[SqlMessageImplementationGuide] = [] + for item in mig_files: + mig: PydanticMessageImplementationGuide + gueltig_von: Optional[date] + gueltig_bis: Optional[date] + if isinstance(item, Path): + mig = MigReader(item).read() + gueltig_von = None + gueltig_bis = None + elif isinstance(item, tuple): + mig = MigReader(item[0]).read() + gueltig_von = item[1] + gueltig_bis = item[2] + else: + raise ValueError(f"Invalid item type in mig_files: {type(item)}") + sql_mig = SqlMessageImplementationGuide.from_model(mig) + sql_mig.gueltig_von = gueltig_von + sql_mig.gueltig_bis = gueltig_bis + if sql_mig.gueltig_von is not None: + sql_mig.edifact_format_version = get_edifact_format_version(sql_mig.gueltig_von) + sql_migs.append(sql_mig) + session.add_all(sql_migs) + session.commit() + + with engine.connect() as conn: + for _op in _after_bulk_insert_ops: + conn.execute(_op) + conn.commit() + + with Session(bind=engine) as session: + create_mig_view(session) + if drop_raw_tables: + for model_class in [ + SqlMessageImplementationGuide, + MigCode, + MigDataElement, + MigDataElementGroup, + MigSegment, + MigSegmentGroup, + MigSegmentGroupLink, + ]: + session.execute(sqlalchemy.text(f"DROP TABLE IF EXISTS {model_class.__tablename__};")) + _logger.debug("Dropped %s", model_class.__tablename__) + session.commit() + + return sqlite_path + + +class MigHierarchyMaterialized(SQLModel, table=True): + """ + A materialized flattened MIG hierarchy containing segment groups, segments, data elements, codes, + and enriched with metadata like format and versionsnummer. + This table is not thought to be written to, but only read from. + It is created once after all other tables have been filled by the create_mig_view function. + """ + + __tablename__ = "mig_hierarchy_materialized" + + id: str = Field(primary_key=True) + mig_pk: UUID = Field(index=True) + current_id: UUID + root_id: UUID + parent_id: Optional[UUID] = None + depth: int + position: Optional[int] = Field(default=None) + path: str + id_path: str = Field(index=True) + parent_path: str + root_order: int + type: str = Field(index=True) + source_id: UUID + sort_path: str = Field(index=True) + + # Metadata + format: EdifactFormat = Field(index=True) + versionsnummer: str = Field(index=True) + gueltig_von: Optional[date] = Field(default=None, index=True) + gueltig_bis: Optional[date] = Field(default=None, index=True) + edifact_format_version: Optional[EdifactFormatVersion] = Field(default=None, index=True) + is_on_uebertragungsdatei_level: Optional[bool] = Field(default=None) + + # Segment Group + segmentgroup_id: Optional[str] = Field(default=None, index=True) + segmentgroup_name: Optional[str] = Field(default=None, index=True) + segmentgroup_status_std: Optional[str] = Field(default=None) + segmentgroup_status_specification: Optional[str] = Field(default=None) + segmentgroup_counter: Optional[str] = Field(default=None) + segmentgroup_level: Optional[int] = Field(default=None) + segmentgroup_max_rep_std: Optional[int] = Field(default=None) + segmentgroup_max_rep_specification: Optional[int] = Field(default=None) + segmentgroup_position: Optional[int] = Field(default=None, index=True) + + # Segment + segment_id: Optional[str] = Field(default=None, index=True) + segment_name: Optional[str] = Field(default=None, index=True) + segment_status_std: Optional[str] = Field(default=None) + segment_status_specification: Optional[str] = Field(default=None) + segment_counter: Optional[str] = Field(default=None) + segment_level: Optional[int] = Field(default=None) + segment_number: Optional[str] = Field(default=None, index=True) + segment_max_rep_std: Optional[int] = Field(default=None) + segment_max_rep_specification: Optional[int] = Field(default=None) + segment_example: Optional[str] = Field(default=None) + segment_description: Optional[str] = Field(default=None) + segment_position: Optional[int] = Field(default=None, index=True) + + # Data Element Group + dataelementgroup_id: Optional[str] = Field(default=None, index=True) + dataelementgroup_name: Optional[str] = Field(default=None, index=True) + dataelementgroup_description: Optional[str] = Field(default=None) + dataelementgroup_status_std: Optional[str] = Field(default=None) + dataelementgroup_status_specification: Optional[str] = Field(default=None) + dataelementgroup_position: Optional[int] = Field(default=None, index=True) + + # Data Element + dataelement_id: Optional[str] = Field(default=None, index=True) + dataelement_name: Optional[str] = Field(default=None, index=True) + dataelement_description: Optional[str] = Field(default=None) + dataelement_status_std: Optional[str] = Field(default=None, index=True) + dataelement_status_specification: Optional[str] = Field(default=None, index=True) + dataelement_format_std: Optional[str] = Field(default=None) + dataelement_format_specification: Optional[str] = Field(default=None) + dataelement_position: Optional[int] = Field(default=None, index=True) + + # Code + code_id: Optional[UUID] = Field(default=None, index=True) + code_name: Optional[str] = Field(default=None, index=True) + code_description: Optional[str] = Field(default=None, index=True) + code_value: Optional[str] = Field(default=None, index=True) + code_position: Optional[int] = Field(default=None, index=True) + + # Computed columns + line_name: Optional[str] = Field(default=None, index=True) + line_status_std: Optional[str] = Field(default=None, index=True) + line_status_specification: Optional[str] = Field(default=None, index=True) + + +__all__ = ["MigHierarchyMaterialized", "create_db_and_populate_with_mig_view", "create_mig_view"] diff --git a/unittests/__snapshots__/test_mig_views.ambr b/unittests/__snapshots__/test_mig_views.ambr new file mode 100644 index 0000000..248232c --- /dev/null +++ b/unittests/__snapshots__/test_mig_views.ambr @@ -0,0 +1,964 @@ +# serializer version: 1 +# name: test_mig_diff_snapshot_comdis + list([ + dict({ + 'diff_status': 'added', + 'id_path': 'UNH>C_S009>D_0057>1.0e>', + 'line_type': 'code', + 'new_code_value': '1.0e', + 'new_dataelement_id': 'D_0057', + 'new_format': 'COMDIS', + 'new_format_version': 'FV2504', + 'new_line_name': 'Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'UNH', + 'old_format': 'COMDIS', + 'old_format_version': 'FV2410', + 'path': 'Nachrichten-Kopfsegment > Nachrichten-Kennung > Anwendungscode der zuständigen Organisation > Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'sort_path': '00000-00001-00004-00000-', + }), + dict({ + 'diff_status': 'deleted', + 'id_path': 'UNH>C_S009>D_0057>1.0d>', + 'line_type': 'code', + 'new_format': 'COMDIS', + 'new_format_version': 'FV2504', + 'old_code_value': '1.0d', + 'old_dataelement_id': 'D_0057', + 'old_format': 'COMDIS', + 'old_format_version': 'FV2410', + 'old_line_name': 'Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'UNH', + 'path': 'Nachrichten-Kopfsegment > Nachrichten-Kennung > Anwendungscode der zuständigen Organisation > Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'sort_path': '00000-00001-00004-00000-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG2>SG3>FTX+ACD>C_C107>D_4441>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4441', + 'new_format': 'COMDIS', + 'new_format_version': 'FV2504', + 'new_line_name': 'Freier Text, Code', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG3', + 'old_dataelement_id': 'D_4441', + 'old_format': 'COMDIS', + 'old_format_version': 'FV2410', + 'old_line_name': 'Nachrichtentyp', + 'old_line_status_specification': 'M', + 'old_line_status_std': 'M', + 'old_segment_id': 'FTX', + 'old_segmentgroup_id': 'SG3', + 'path': 'Dokument-/Nachricht-Einzelheiten > Begründung der Korrektheit > Begründung Richtigkeit mit Angabe einer Nachrichtenreferenz > Text-Referenz > Freier Text, Code', + 'sort_path': '00007-00002-00001-00002-00000-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG2>SG3>FTX+ACD>C_C108>D_4440>#3', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'COMDIS', + 'new_format_version': 'FV2504', + 'new_line_name': 'Datenaustauschreferenz der Empfangsbestätigung (CONTRL) oder der Anerkennungsmeldung (APERAK)', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG3', + 'old_dataelement_id': 'D_4440', + 'old_format': 'COMDIS', + 'old_format_version': 'FV2410', + 'old_line_name': 'Datenaustauschreferenz der Empfangsbestätigung', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'FTX', + 'old_segmentgroup_id': 'SG3', + 'path': 'Dokument-/Nachricht-Einzelheiten > Begründung der Korrektheit > Begründung Richtigkeit mit Angabe einer Nachrichtenreferenz > Text > Datenaustauschreferenz der Empfangsbestätigung (CONTRL) oder der Anerkennungsmeldung (APERAK)', + 'sort_path': '00007-00002-00001-00003-00002-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG2>SG3>FTX+ACD>C_C108>D_4440>#4', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'COMDIS', + 'new_format_version': 'FV2504', + 'new_line_name': 'Nachrichtennummer aus der Anerkennungsmeldung (APERAK)', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG3', + 'old_format': 'COMDIS', + 'old_format_version': 'FV2410', + 'path': 'Dokument-/Nachricht-Einzelheiten > Begründung der Korrektheit > Begründung Richtigkeit mit Angabe einer Nachrichtenreferenz > Text > Nachrichtennummer aus der Anerkennungsmeldung (APERAK)', + 'sort_path': '00007-00002-00001-00003-00003-', + }), + ]) +# --- +# name: test_mig_diff_snapshot_iftsta + list([ + dict({ + 'diff_status': 'added', + 'id_path': 'UNH>C_S009>D_0057>2.0g>', + 'line_type': 'code', + 'new_code_value': '2.0g', + 'new_dataelement_id': 'D_0057', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'UNH', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'Nachrichten-Kopfsegment > Nachrichten-Kennung > Anwendungscode der zuständigen Organisation > Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'sort_path': '00000-00001-00004-00000-', + }), + dict({ + 'diff_status': 'deleted', + 'id_path': 'UNH>C_S009>D_0057>2.0f>', + 'line_type': 'code', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'old_code_value': '2.0f', + 'old_dataelement_id': 'D_0057', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'UNH', + 'path': 'Nachrichten-Kopfsegment > Nachrichten-Kennung > Anwendungscode der zuständigen Organisation > Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'sort_path': '00000-00001-00004-00000-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z15>STS>C_C556>D_9013>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_9013', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Code des Prüfschritts', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_dataelement_id': 'D_9013', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Statusanlass, Code', + 'old_line_status_specification': 'M', + 'old_line_status_std': 'M', + 'old_segment_id': 'STS', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Status des Umbaus der Messlokation > Statusanlaß > Code des Prüfschritts', + 'sort_path': '00006-00003-00000-00002-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>STS>C_C556>D_1131>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1131', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Codeliste, Code', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Status des Umbaus der Messlokation > Statusanlaß > Codeliste, Code', + 'sort_path': '00006-00003-00000-00002-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>STS>C_C556>D_1131>E_0286>', + 'line_type': 'code', + 'new_code_value': 'E_0286', + 'new_dataelement_id': 'D_1131', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'EBD Nr. E_0286', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Status des Umbaus der Messlokation > Statusanlaß > Codeliste, Code > EBD Nr. E_0286', + 'sort_path': '00006-00003-00000-00002-00001-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>', + 'line_type': 'segment_group', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Sendungspositionseinzelheiten', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten', + 'sort_path': '00006-00003-00003-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>GID>', + 'line_type': 'segment', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Sendungspositionseinzelheiten', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'GID', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Sendungspositionseinzelheiten', + 'sort_path': '00006-00003-00003-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>GID>D_1496>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1496', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Sendungsposition', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'GID', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Sendungspositionseinzelheiten > Sendungsposition', + 'sort_path': '00006-00003-00003-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>', + 'line_type': 'segment', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text', + 'sort_path': '00006-00003-00003-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>D_4451>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4451', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Textbezug, Qualifier', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Textbezug, Qualifier', + 'sort_path': '00006-00003-00003-00001-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>D_4451>ACB>', + 'line_type': 'code', + 'new_code_value': 'ACB', + 'new_dataelement_id': 'D_4451', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Zusätzliche Informationen (für allgemeine Hinweise)', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Textbezug, Qualifier > Zusätzliche Informationen (für allgemeine Hinweise)', + 'sort_path': '00006-00003-00003-00001-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>D_4453>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4453', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Textfunktion, Code', + 'new_line_status_specification': 'N', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Textfunktion, Code', + 'sort_path': '00006-00003-00003-00001-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C107>', + 'line_type': 'dataelementgroup', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Text-Referenz', + 'new_line_status_specification': 'N', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text-Referenz', + 'sort_path': '00006-00003-00003-00001-00002-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C107>D_4441>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4441', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text, Code', + 'new_line_status_specification': 'N', + 'new_line_status_std': 'M', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text-Referenz > Freier Text, Code', + 'sort_path': '00006-00003-00003-00001-00002-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C108>', + 'line_type': 'dataelementgroup', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Text', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text', + 'sort_path': '00006-00003-00003-00001-00003-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C108>D_4440>#1', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text > Freier Text #1', + 'sort_path': '00006-00003-00003-00001-00003-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C108>D_4440>#2', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text', + 'new_line_status_specification': 'C', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text > Freier Text #2', + 'sort_path': '00006-00003-00003-00001-00003-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C108>D_4440>#3', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text', + 'new_line_status_specification': 'C', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text > Freier Text #3', + 'sort_path': '00006-00003-00003-00001-00003-00002-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C108>D_4440>#4', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text', + 'new_line_status_specification': 'C', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text > Freier Text #4', + 'sort_path': '00006-00003-00003-00001-00003-00003-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z15>SG25>FTX>C_C108>D_4440>#5', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_4440', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Freier Text', + 'new_line_status_specification': 'C', + 'new_line_status_std': 'C', + 'new_segment_id': 'FTX', + 'new_segmentgroup_id': 'SG25', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status des Umbaus der Messlokation > Sendungspositionseinzelheiten > Freier Text > Text > Freier Text #5', + 'sort_path': '00006-00003-00003-00001-00003-00004-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z20>STS>C_C556>D_1131>E_0278>', + 'line_type': 'code', + 'new_code_value': 'E_0278', + 'new_dataelement_id': 'D_1131', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'EBD Nr. E_0278', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status zum Angebot oder Anfrage > Status zum Angebot oder Anfrage > Statusanlaß > Codeliste, Code > EBD Nr. E_0278', + 'sort_path': '00006-00008-00000-00002-00001-00006-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG14>SG15+Z20>STS>C_C556>D_1131>E_0281>', + 'line_type': 'code', + 'new_code_value': 'E_0281', + 'new_dataelement_id': 'D_1131', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'EBD Nr. E_0281', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'path': 'CNI-LOC-CNT-SG15 > Status zum Angebot oder Anfrage > Status zum Angebot oder Anfrage > Statusanlaß > Codeliste, Code > EBD Nr. E_0281', + 'sort_path': '00006-00008-00000-00002-00001-00007-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z27>RFF+ACW>C_C506>D_1154>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1154', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Referenznummer', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG15', + 'old_dataelement_id': 'D_1154', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Referenz, Identifikation', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'RFF', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Status der Ausfallarbeit > Referenzangaben > Referenz > Referenznummer', + 'sort_path': '00006-00011-00002-00000-00001-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z28>RFF+ACW>C_C506>D_1154>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1154', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Referenznummer', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG15', + 'old_dataelement_id': 'D_1154', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Referenz, Identifikation', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'RFF', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Status des Fahrplananteils > Referenzangaben > Referenz > Referenznummer', + 'sort_path': '00006-00012-00002-00000-00001-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z29>RFF+ACW>C_C506>D_1154>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1154', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Referenznummer', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG15', + 'old_dataelement_id': 'D_1154', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Referenz, Identifikation', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'RFF', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Status des Gegenvorschlags der Ausfallarbeit > Referenzangaben > Referenz > Referenznummer', + 'sort_path': '00006-00013-00002-00000-00001-', + }), + dict({ + 'changed_columns': 'line_name', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z30>RFF+ACW>C_C506>D_1154>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1154', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Referenznummer', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG15', + 'old_dataelement_id': 'D_1154', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Referenz, Identifikation', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'RFF', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Status des Gegenvorschlags des Fahrplananteils > Referenzangaben > Referenz > Referenznummer', + 'sort_path': '00006-00014-00002-00000-00001-', + }), + dict({ + 'changed_columns': 'line_status_specification', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z43>STS>C_C601>', + 'line_type': 'dataelementgroup', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Statuskategorie', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Statuskategorie', + 'old_line_status_specification': 'C', + 'old_line_status_std': 'C', + 'old_segment_id': 'STS', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Bearbeitungsstatus > Bearbeitungsstatus > Statuskategorie', + 'sort_path': '00006-00019-00000-00000-', + }), + dict({ + 'changed_columns': 'line_status_specification', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z43>STS>C_C555>', + 'line_type': 'dataelementgroup', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Status', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Status', + 'old_line_status_specification': 'C', + 'old_line_status_std': 'C', + 'old_segment_id': 'STS', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Bearbeitungsstatus > Bearbeitungsstatus > Status', + 'sort_path': '00006-00019-00000-00001-', + }), + dict({ + 'changed_columns': 'line_status_specification', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z43>STS>C_C556>', + 'line_type': 'dataelementgroup', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Statusanlaß', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'STS', + 'new_segmentgroup_id': 'SG15', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Statusanlaß', + 'old_line_status_specification': 'C', + 'old_line_status_std': 'C', + 'old_segment_id': 'STS', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Bearbeitungsstatus > Bearbeitungsstatus > Statusanlaß', + 'sort_path': '00006-00019-00000-00002-', + }), + dict({ + 'changed_columns': 'line_status_specification, line_name', + 'diff_status': 'modified', + 'id_path': 'SG14>SG15+Z43>RFF+ACW>C_C506>D_1154>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1154', + 'new_format': 'IFTSTA', + 'new_format_version': 'FV2510', + 'new_line_name': 'Referenznummer', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG15', + 'old_dataelement_id': 'D_1154', + 'old_format': 'IFTSTA', + 'old_format_version': 'FV2504', + 'old_line_name': 'Referenz, Identifikation', + 'old_line_status_specification': 'C', + 'old_line_status_std': 'C', + 'old_segment_id': 'RFF', + 'old_segmentgroup_id': 'SG15', + 'path': 'CNI-LOC-CNT-SG15 > Bearbeitungsstatus > Referenzangaben > Referenz > Referenznummer', + 'sort_path': '00006-00019-00002-00000-00001-', + }), + ]) +# --- +# name: test_mig_diff_snapshot_pricat + list([ + dict({ + 'diff_status': 'added', + 'id_path': 'UNH>C_S009>D_0057>2.0d>', + 'line_type': 'code', + 'new_code_value': '2.0d', + 'new_dataelement_id': 'D_0057', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'UNH', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Nachrichten-Kopfsegment > Nachrichten-Kennung > Anwendungscode der zuständigen Organisation > Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'sort_path': '00000-00001-00004-00000-', + }), + dict({ + 'diff_status': 'deleted', + 'id_path': 'UNH>C_S009>D_0057>2.0c>', + 'line_type': 'code', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'old_code_value': '2.0c', + 'old_dataelement_id': 'D_0057', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'old_line_name': 'Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'old_line_status_specification': 'R', + 'old_line_status_std': 'C', + 'old_segment_id': 'UNH', + 'path': 'Nachrichten-Kopfsegment > Nachrichten-Kennung > Anwendungscode der zuständigen Organisation > Versionsnummer der zugrundeliegenden BDEW-Nachrichtenbeschreibung', + 'sort_path': '00000-00001-00004-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG1+Z56>', + 'line_type': 'segment_group', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Preise des Netzbetreibers', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segmentgroup_id': 'SG1', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Preise des Netzbetreibers', + 'sort_path': '00006-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG1+Z56>RFF>', + 'line_type': 'segment', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Preise des Netzbetreibers', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG1', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Preise des Netzbetreibers > Preise des Netzbetreibers', + 'sort_path': '00006-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG1+Z56>RFF>C_C506>', + 'line_type': 'dataelementgroup', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Referenz', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG1', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Preise des Netzbetreibers > Preise des Netzbetreibers > Referenz', + 'sort_path': '00006-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG1+Z56>RFF>C_C506>D_1153>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1153', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Referenz, Qualifier', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG1', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Preise des Netzbetreibers > Preise des Netzbetreibers > Referenz > Referenz, Qualifier', + 'sort_path': '00006-00000-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG1+Z56>RFF>C_C506>D_1153>Z56>', + 'line_type': 'code', + 'new_code_value': 'Z56', + 'new_dataelement_id': 'D_1153', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Preise des Netzbetreibers', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG1', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Preise des Netzbetreibers > Preise des Netzbetreibers > Referenz > Referenz, Qualifier > Preise des Netzbetreibers', + 'sort_path': '00006-00000-00000-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG1+Z56>RFF>C_C506>D_1154>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_1154', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'MP-ID', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RFF', + 'new_segmentgroup_id': 'SG1', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'Preise des Netzbetreibers > Preise des Netzbetreibers > Referenz > MP-ID', + 'sort_path': '00006-00000-00000-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>', + 'line_type': 'segment', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Angaben zum Wertebereich', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich', + 'sort_path': '00011-00001-00003-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>D_6167>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_6167', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Wertebereich, Qualifier', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich, Qualifier', + 'sort_path': '00011-00001-00003-00001-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>D_6167>10>', + 'line_type': 'code', + 'new_code_value': '10', + 'new_dataelement_id': 'D_6167', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'jährlicher Mengenbereich', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich, Qualifier > jährlicher Mengenbereich', + 'sort_path': '00011-00001-00003-00001-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>C_C280>', + 'line_type': 'dataelementgroup', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Wertebereich', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich', + 'sort_path': '00011-00001-00003-00001-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>C_C280>D_6411>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_6411', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Maßeinheit, Code', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich > Maßeinheit, Code', + 'sort_path': '00011-00001-00003-00001-00001-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>C_C280>D_6411>H87>', + 'line_type': 'code', + 'new_code_value': 'H87', + 'new_dataelement_id': 'D_6411', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Stück', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich > Maßeinheit, Code > Stück', + 'sort_path': '00011-00001-00003-00001-00001-00000-00000-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>C_C280>D_6411>DAY>', + 'line_type': 'code', + 'new_code_value': 'DAY', + 'new_dataelement_id': 'D_6411', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Tag', + 'new_line_status_specification': 'M', + 'new_line_status_std': 'M', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich > Maßeinheit, Code > Tag', + 'sort_path': '00011-00001-00003-00001-00001-00000-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>C_C280>D_6162>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_6162', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Wertebereichsgrenze, untere', + 'new_line_status_specification': 'R', + 'new_line_status_std': 'C', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich > Wertebereichsgrenze, untere', + 'sort_path': '00011-00001-00003-00001-00001-00001-', + }), + dict({ + 'diff_status': 'added', + 'id_path': 'SG17+9>SG36>SG40>RNG>C_C280>D_6152>', + 'line_type': 'dataelement', + 'new_dataelement_id': 'D_6152', + 'new_format': 'PRICAT', + 'new_format_version': 'FV2504', + 'new_line_name': 'Wertebereichsgrenze, obere', + 'new_line_status_specification': 'D', + 'new_line_status_std': 'C', + 'new_segment_id': 'RNG', + 'new_segmentgroup_id': 'SG40', + 'old_format': 'PRICAT', + 'old_format_version': 'FV2410', + 'path': 'PGI-SG36 > LIN-PIA-IMD-MEA-QTY-HAN-ALI-DTM-RFF-LOC-DOC-PTY-FTX-SG37-SG38-SG39-SG40-SG41-SG47-SG48-SG49-SG50-SG51-SG52-SG54-SG55-SG56 > Preisangabe > Angaben zum Wertebereich > Wertebereich > Wertebereichsgrenze, obere', + 'sort_path': '00011-00001-00003-00001-00001-00002-', + }), + ]) +# --- diff --git a/unittests/test_mig_views.py b/unittests/test_mig_views.py new file mode 100644 index 0000000..246982d --- /dev/null +++ b/unittests/test_mig_views.py @@ -0,0 +1,330 @@ +""" +Tests for MIG hierarchy and diff views +""" + +from datetime import date +from pathlib import Path +from typing import Generator + +import pytest +import sqlalchemy.exc +from efoli import EdifactFormatVersion +from sqlmodel import Session, SQLModel, create_engine, select +from syrupy.assertion import SnapshotAssertion + +from fundamend import MigReader +from fundamend.sqlmodels import MessageImplementationGuide as SqlMessageImplementationGuide +from fundamend.sqlmodels import ( + MigDiffLine, + MigHierarchyMaterialized, + create_db_and_populate_with_mig_view, + create_mig_diff_view, + create_mig_view, +) + +from .conftest import is_private_submodule_checked_out, private_submodule_root + + +@pytest.fixture() +def sqlite_session(tmp_path: Path) -> Generator[Session, None, None]: + database_path = tmp_path / "test_mig_view.db" + engine = create_engine(f"sqlite:///{database_path}") + SQLModel.metadata.drop_all(engine) + SQLModel.metadata.create_all(engine) + with Session(bind=engine) as session: + yield session + session.commit() + session.flush() + print(f"Wrote all data to {database_path.absolute()}") + + +def test_mig_hierarchy_view_single_mig(sqlite_session: Session) -> None: + """Test that MIG hierarchy view works for a single MIG file""" + mig = MigReader( + Path(__file__).parent / "example_files" / "UTILTS_MIG_1.1d_Konsultationsfassung_2024_04_02.xml" + ).read() + sql_mig = SqlMessageImplementationGuide.from_model(mig) + sqlite_session.add(sql_mig) + sqlite_session.commit() + + create_mig_view(sqlite_session) + + # Query the materialized view + statement = ( + select(MigHierarchyMaterialized) + .where(MigHierarchyMaterialized.format == "UTILTS") + .order_by(MigHierarchyMaterialized.sort_path) + ) + results = sqlite_session.exec(statement).all() + + assert len(results) > 0, "Hierarchy should have entries" + + # Check first row + first_row = results[0] + assert first_row.format == "UTILTS" + assert first_row.type in ("segment", "segment_group") + + # Verify computed columns exist + assert first_row.line_name is not None + assert first_row.line_status_std is not None + + +def test_create_db_and_populate_with_mig_view() -> None: + """Test the convenience function to create and populate MIG database""" + mig_paths = [Path(__file__).parent / "example_files" / "UTILTS_MIG_1.1d_Konsultationsfassung_2024_04_02.xml"] + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths) + assert actual_sqlite_path.exists() + + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + with Session(bind=engine) as session: + stmt = ( + select(MigHierarchyMaterialized) + .where(MigHierarchyMaterialized.format == "UTILTS") + .order_by(MigHierarchyMaterialized.sort_path) + ) + results = session.exec(stmt).all() + + assert len(results) > 0 + + +@pytest.mark.parametrize("drop_raw_tables", [True, False]) +def test_create_db_and_populate_with_mig_view_drop_tables(drop_raw_tables: bool) -> None: + """Test that drop_raw_tables option works""" + mig_paths = [Path(__file__).parent / "example_files" / "UTILTS_MIG_1.1d_Konsultationsfassung_2024_04_02.xml"] + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths, drop_raw_tables=drop_raw_tables) + assert actual_sqlite_path.exists() + + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + with Session(bind=engine) as session: + # Check if raw tables exist + try: + session.execute(select(SqlMessageImplementationGuide)) + raw_tables_exist = True + except sqlalchemy.exc.OperationalError: + raw_tables_exist = False + + if drop_raw_tables: + assert not raw_tables_exist, "Raw tables should be dropped" + else: + assert raw_tables_exist, "Raw tables should exist" + + +def test_mig_view_with_validity_dates() -> None: + """Test that validity dates are properly stored""" + mig_paths = [ + ( + Path(__file__).parent / "example_files" / "UTILTS_MIG_1.1d_Konsultationsfassung_2024_04_02.xml", + date(2024, 4, 2), + date(2025, 6, 6), + ) + ] + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths) + + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + with Session(bind=engine) as session: + stmt = select(MigHierarchyMaterialized).where(MigHierarchyMaterialized.format == "UTILTS").limit(1) + result = session.exec(stmt).first() + + assert result is not None + assert result.gueltig_von == date(2024, 4, 2) + assert result.gueltig_bis == date(2025, 6, 6) + + +def test_mig_diff_view_with_two_versions() -> None: + """Test MIG diff view with two different versions""" + if not is_private_submodule_checked_out(): + pytest.skip("Skipping test because of missing private submodule") + + # Find UTILTS MIG files from different format versions + fv2410_migs = list((private_submodule_root / "FV2410").rglob("**/UTILTS_MIG*.xml")) + fv2504_migs = list((private_submodule_root / "FV2504").rglob("**/UTILTS_MIG*.xml")) + + if not fv2410_migs or not fv2504_migs: + pytest.skip("No UTILTS MIG files found in both FV2410 and FV2504") + + mig_paths = [ + (fv2410_migs[0], date(2024, 10, 1), date(2025, 6, 6)), + (fv2504_migs[0], date(2025, 6, 6), None), + ] + + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths, drop_raw_tables=False) + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + + with Session(bind=engine) as session: + create_mig_diff_view(session) + + # Query the diff view + stmt = ( + select(MigDiffLine) + .where(MigDiffLine.old_format == "UTILTS") + .where(MigDiffLine.new_format == "UTILTS") + .order_by(MigDiffLine.sort_path) + .limit(100) + ) + results = session.exec(stmt).all() + + # Should have some results + assert len(results) > 0 + + # Check that diff_status values are valid + valid_statuses = {"added", "deleted", "modified", "unchanged"} + for row in results: + assert row.diff_status in valid_statuses + + +def test_mig_hierarchy_all_example_migs(sqlite_session: Session) -> None: + """Test hierarchy view with all example MIG files""" + example_files_dir = Path(__file__).parent / "example_files" + mig_files = list(example_files_dir.glob("*MIG*.xml")) + + for mig_file in mig_files: + mig = MigReader(mig_file).read() + sql_mig = SqlMessageImplementationGuide.from_model(mig) + sqlite_session.add(sql_mig) + + sqlite_session.commit() + create_mig_view(sqlite_session) + + # Query and verify + statement = select(MigHierarchyMaterialized).order_by(MigHierarchyMaterialized.sort_path) + results = sqlite_session.exec(statement).all() + + assert len(results) > 0, "Should have hierarchy entries for all MIGs" + + +def test_mig_hierarchy_all_from_submodule() -> None: + """Test hierarchy view with all MIGs from private submodule""" + if not is_private_submodule_checked_out(): + pytest.skip("Skipping test because of missing private submodule") + + mig_paths = list(private_submodule_root.rglob("**/*MIG*.xml")) + + if not mig_paths: + pytest.skip("No MIG files found in submodule") + + # Use just the first few MIGs to keep test reasonable + mig_paths = mig_paths[:5] + + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths) + assert actual_sqlite_path.exists() + + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + with Session(bind=engine) as session: + stmt = select(MigHierarchyMaterialized).limit(10) + results = session.exec(stmt).all() + + assert len(results) > 0 + + +@pytest.mark.snapshot +def test_mig_diff_snapshot_comdis(snapshot: SnapshotAssertion) -> None: + """Snapshot test for MIG diff view comparing COMDIS between FV2410 and FV2504""" + if not is_private_submodule_checked_out(): + pytest.skip("Skipping test because of missing private submodule") + + fv2410_comdis = private_submodule_root / "FV2410" / "COMDIS_MIG_1_0d__außerordentliche_20240726.xml" + fv2504_comdis = private_submodule_root / "FV2504" / "COMDIS_MIG_1_0e__20240619.xml" + + if not fv2410_comdis.exists() or not fv2504_comdis.exists(): + pytest.skip("COMDIS MIG files not found in both FV2410 and FV2504") + + mig_paths = [ + (fv2410_comdis, date(2024, 10, 1), date(2025, 6, 6)), + (fv2504_comdis, date(2025, 6, 6), None), + ] + + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths, drop_raw_tables=False) + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + + with Session(bind=engine) as session: + create_mig_diff_view(session) + + stmt = ( + select(MigDiffLine) + .where(MigDiffLine.old_format_version == EdifactFormatVersion.FV2410) + .where(MigDiffLine.new_format_version == EdifactFormatVersion.FV2504) + .where(MigDiffLine.old_format == "COMDIS") + .where(MigDiffLine.new_format == "COMDIS") + .where(MigDiffLine.diff_status != "unchanged") + .order_by(MigDiffLine.sort_path) + ) + results = session.exec(stmt).all() + + raw_results = [r.model_dump(mode="json", exclude_none=True) for r in results] + snapshot.assert_match(raw_results) + + +@pytest.mark.snapshot +def test_mig_diff_snapshot_pricat(snapshot: SnapshotAssertion) -> None: + """Snapshot test for MIG diff view comparing PRICAT between FV2410 and FV2504 (larger diff)""" + if not is_private_submodule_checked_out(): + pytest.skip("Skipping test because of missing private submodule") + + fv2410_pricat = private_submodule_root / "FV2410" / "PRICAT_MIG_2_0c_Fehlerkorrektur_20240617.xml" + fv2504_pricat = private_submodule_root / "FV2504" / "PRICAT_MIG_2_0d_20240619.xml" + + if not fv2410_pricat.exists() or not fv2504_pricat.exists(): + pytest.skip("PRICAT MIG files not found in both FV2410 and FV2504") + + mig_paths = [ + (fv2410_pricat, date(2024, 10, 1), date(2025, 6, 6)), + (fv2504_pricat, date(2025, 6, 6), None), + ] + + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths, drop_raw_tables=False) + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + + with Session(bind=engine) as session: + create_mig_diff_view(session) + + stmt = ( + select(MigDiffLine) + .where(MigDiffLine.old_format_version == EdifactFormatVersion.FV2410) + .where(MigDiffLine.new_format_version == EdifactFormatVersion.FV2504) + .where(MigDiffLine.old_format == "PRICAT") + .where(MigDiffLine.new_format == "PRICAT") + .where(MigDiffLine.diff_status != "unchanged") + .order_by(MigDiffLine.sort_path) + ) + results = session.exec(stmt).all() + + raw_results = [r.model_dump(mode="json", exclude_none=True) for r in results] + snapshot.assert_match(raw_results) + + +@pytest.mark.snapshot +def test_mig_diff_snapshot_iftsta(snapshot: SnapshotAssertion) -> None: + """Snapshot test for MIG diff view comparing IFTSTA between FV2504 and FV2510 (version 2.0f vs 2.0g)""" + if not is_private_submodule_checked_out(): + pytest.skip("Skipping test because of missing private submodule") + + fv2504_iftsta = private_submodule_root / "FV2504" / "IFTSTA_MIG_2_0f_Fehlerkorrektur_20250225.xml" + fv2510_iftsta = private_submodule_root / "FV2510" / "IFTSTA_MIG_2_0g_20250401.xml" + + if not fv2504_iftsta.exists() or not fv2510_iftsta.exists(): + pytest.skip("IFTSTA MIG files not found in both FV2504 and FV2510") + + mig_paths = [ + (fv2504_iftsta, date(2025, 6, 6), date(2025, 10, 1)), + (fv2510_iftsta, date(2025, 10, 1), None), + ] + + actual_sqlite_path = create_db_and_populate_with_mig_view(mig_files=mig_paths, drop_raw_tables=False) + engine = create_engine(f"sqlite:///{actual_sqlite_path}") + + with Session(bind=engine) as session: + create_mig_diff_view(session) + + stmt = ( + select(MigDiffLine) + .where(MigDiffLine.old_format_version == EdifactFormatVersion.FV2504) + .where(MigDiffLine.new_format_version == EdifactFormatVersion.FV2510) + .where(MigDiffLine.old_format == "IFTSTA") + .where(MigDiffLine.new_format == "IFTSTA") + .where(MigDiffLine.diff_status != "unchanged") + .order_by(MigDiffLine.sort_path) + ) + results = session.exec(stmt).all() + + raw_results = [r.model_dump(mode="json", exclude_none=True) for r in results] + snapshot.assert_match(raw_results)