|
1 | 1 | import json |
2 | 2 | import logging |
3 | | -from typing import Optional |
4 | 3 |
|
5 | 4 | from jsonschema import Draft7Validator, ValidationError |
6 | 5 |
|
| 6 | +from target_clickhouse.sinks import handle_validation_error |
| 7 | + |
7 | 8 | # Schema that allows a field to be either a string or null |
8 | 9 | schema = { |
9 | 10 | "type": "object", |
|
20 | 21 | "name": {"type": ["string", "null"]}, |
21 | 22 | "age": {"type": "number"}, |
22 | 23 | "address": { |
23 | | - "type": "object", |
24 | | - "properties": { |
25 | | - "street": {"type": "string"}, |
26 | | - "city": {"type": "string"}, |
27 | | - }, |
28 | | - "required": ["street", "city"], |
| 24 | + "type": ["string", "null"], |
29 | 25 | }, |
30 | 26 | }, |
31 | 27 | "required": ["name", "age", "address"], |
|
35 | 31 | # Validator instance |
36 | 32 | validator = Draft7Validator(schema) |
37 | 33 |
|
38 | | -# Function to handle validation errors |
39 | | -def handle_validation_error(record, |
40 | | - e: ValidationError, |
41 | | - logger: Optional[logging.Logger] = None): |
42 | | - if "'string'" in e.message: |
43 | | - if logger: |
44 | | - logger.warning( |
45 | | - f"Received non valid record for types 'string', {e.path}, " |
46 | | - f"attempting conversion for record, {record}", |
47 | | - ) |
48 | | - |
49 | | - |
50 | | - key_path = list(e.path) |
51 | | - |
52 | | - # Access the problematic value using the key_path |
53 | | - current_level = record |
54 | | - for key in key_path[:-1]: # Go to parent of the problematic key |
55 | | - current_level = current_level[key] |
56 | | - |
57 | | - problem_key = key_path[-1] |
58 | | - problem_value = current_level[problem_key] |
59 | | - |
60 | | - # Convert the problematic value to string only if it's not null. |
61 | | - if problem_value is not None: |
62 | | - current_level[problem_key] = str(problem_value) |
63 | | - if logger: |
64 | | - logger.warning("Validating converted record") |
65 | | - return record |
66 | | - return None |
67 | | - return None |
| 34 | +nested_validator = Draft7Validator(nested_schema) |
68 | 35 |
|
69 | 36 | # Set up the logger |
70 | 37 | logging.basicConfig(level=logging.INFO) |
@@ -130,33 +97,53 @@ def test_nested_dict_with_nested_non_string(): |
130 | 97 | ), "The 'city' should have been converted to a string." |
131 | 98 | validator.validate(updated_record) # This should not raise an error |
132 | 99 |
|
133 | | - def test_single_level_schema_nested_dict_to_string(): |
134 | | - record = {"name": {"first": "John", "last": "Doe"}, "age": 30} |
135 | | - try: |
136 | | - validator.validate(record) |
137 | | - except ValidationError as e: |
138 | | - updated_record = handle_validation_error(record, e, logger) |
139 | | - assert ( |
140 | | - isinstance(updated_record["name"], str) |
141 | | - ), "The 'name' should have been converted to a JSON string." |
142 | | - assert ( |
143 | | - json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"} |
144 | | - ), "The JSON string is not correct." |
145 | | - |
146 | | - def test_single_level_schema_deeply_nested_dict_to_string(): |
147 | | - record = {"name": |
148 | | - {"first": "John", "last": "Doe", |
149 | | - "nicknames": {"short": "JD", "long": "Johnny"}, |
150 | | - }, |
151 | | - "age": 30, |
152 | | - } |
153 | | - try: |
154 | | - validator.validate(record) |
155 | | - except ValidationError as e: |
156 | | - updated_record = handle_validation_error(record, e, logger) |
157 | | - assert ( |
158 | | - isinstance(updated_record["name"], str) |
159 | | - ), "The 'name' field should have been converted to a JSON string." |
160 | | - assert ( |
161 | | - "nicknames" in json.loads(updated_record["name"]) |
162 | | - ), "The JSON string does not correctly represent the nested dict." |
| 100 | +def test_single_level_schema_nested_dict_to_string(): |
| 101 | + record = {"name": {"first": "John", "last": "Doe"}, "age": 30} |
| 102 | + try: |
| 103 | + nested_validator.validate(record) |
| 104 | + except ValidationError as e: |
| 105 | + updated_record = handle_validation_error(record, e, logger) |
| 106 | + assert ( |
| 107 | + isinstance(updated_record["name"], str) |
| 108 | + ), "The 'name' should have been converted to a JSON string." |
| 109 | + assert ( |
| 110 | + json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"} |
| 111 | + ), "The JSON string is not correct." |
| 112 | + |
| 113 | +def test_single_level_schema_deeply_nested_dict_to_string(): |
| 114 | + record = { |
| 115 | + "name": "John", |
| 116 | + "age": 30, |
| 117 | + "address": {"street": "Main", "city": {"name": "New York"}}, |
| 118 | + } |
| 119 | + try: |
| 120 | + nested_validator.validate(record) |
| 121 | + except ValidationError as e: |
| 122 | + updated_record = handle_validation_error(record, e, logger) |
| 123 | + assert ( |
| 124 | + isinstance(updated_record["address"], str) |
| 125 | + ), "The 'address' field should have been converted to a JSON string." |
| 126 | + assert ( |
| 127 | + "street" in json.loads(updated_record["address"]) |
| 128 | + ), "The JSON string does not correctly represent the nested dict." |
| 129 | + |
| 130 | +def test_single_level_schema_deeply_nested_list_of_dicts_to_string(): |
| 131 | + record = { |
| 132 | + "name": "John", |
| 133 | + "age": 30, |
| 134 | + "address": [ |
| 135 | + {"street": "Main", "city": {"name": "New York"}}, |
| 136 | + {"street": "Second", "city": {"name": "Los Angeles"}}, |
| 137 | + ], |
| 138 | + } |
| 139 | + address_str = json.dumps(record["address"]) |
| 140 | + try: |
| 141 | + nested_validator.validate(record) |
| 142 | + except ValidationError as e: |
| 143 | + updated_record = handle_validation_error(record, e, logger) |
| 144 | + assert ( |
| 145 | + isinstance(updated_record["address"], str) |
| 146 | + ), "The 'address' field should have been converted to a JSON string." |
| 147 | + assert ( |
| 148 | + updated_record["address"] == address_str |
| 149 | + ), "The JSON string does not correctly represent the nested list of dicts." |
0 commit comments