Skip to content

Commit 128a446

Browse files
authored
Fix conversion of list to dict when overriding schema exception (#68)
* fix * fix
1 parent 340751e commit 128a446

File tree

3 files changed

+57
-70
lines changed

3 files changed

+57
-70
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "shaped-target-clickhouse"
3-
version = "0.1.3"
3+
version = "0.1.4"
44
description = "`target-clickhouse` is a Singer target for clickhouse, built with the Meltano Singer SDK."
55
readme = "README.md"
66
authors = ["Ben Theunissen"]

target_clickhouse/sinks.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def _validate_and_parse(self, record: dict) -> dict:
153153
self._validator.validate(record)
154154
except jsonschema_exceptions.ValidationError as e:
155155
record = handle_validation_error(record, e, self.logger)
156-
self._validator.validate(record)
156+
return self._validate_and_parse(record)
157157

158158
self._parse_timestamps_in_record(
159159
record=record,
@@ -186,7 +186,7 @@ def handle_validation_error(record,
186186

187187
# Convert the problematic value to string only if it's not null
188188
if problem_value is not None:
189-
if isinstance(problem_value, dict):
189+
if isinstance(problem_value, (dict, list)):
190190
# Convert the dict to JSON string
191191
current_level[problem_key] = json.dumps(problem_value)
192192
else:

tests/test_validation.py

Lines changed: 54 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import json
22
import logging
3-
from typing import Optional
43

54
from jsonschema import Draft7Validator, ValidationError
65

6+
from target_clickhouse.sinks import handle_validation_error
7+
78
# Schema that allows a field to be either a string or null
89
schema = {
910
"type": "object",
@@ -20,12 +21,7 @@
2021
"name": {"type": ["string", "null"]},
2122
"age": {"type": "number"},
2223
"address": {
23-
"type": "object",
24-
"properties": {
25-
"street": {"type": "string"},
26-
"city": {"type": "string"},
27-
},
28-
"required": ["street", "city"],
24+
"type": ["string", "null"],
2925
},
3026
},
3127
"required": ["name", "age", "address"],
@@ -35,36 +31,7 @@
3531
# Validator instance
3632
validator = Draft7Validator(schema)
3733

38-
# Function to handle validation errors
39-
def handle_validation_error(record,
40-
e: ValidationError,
41-
logger: Optional[logging.Logger] = None):
42-
if "'string'" in e.message:
43-
if logger:
44-
logger.warning(
45-
f"Received non valid record for types 'string', {e.path}, "
46-
f"attempting conversion for record, {record}",
47-
)
48-
49-
50-
key_path = list(e.path)
51-
52-
# Access the problematic value using the key_path
53-
current_level = record
54-
for key in key_path[:-1]: # Go to parent of the problematic key
55-
current_level = current_level[key]
56-
57-
problem_key = key_path[-1]
58-
problem_value = current_level[problem_key]
59-
60-
# Convert the problematic value to string only if it's not null.
61-
if problem_value is not None:
62-
current_level[problem_key] = str(problem_value)
63-
if logger:
64-
logger.warning("Validating converted record")
65-
return record
66-
return None
67-
return None
34+
nested_validator = Draft7Validator(nested_schema)
6835

6936
# Set up the logger
7037
logging.basicConfig(level=logging.INFO)
@@ -130,33 +97,53 @@ def test_nested_dict_with_nested_non_string():
13097
), "The 'city' should have been converted to a string."
13198
validator.validate(updated_record) # This should not raise an error
13299

133-
def test_single_level_schema_nested_dict_to_string():
134-
record = {"name": {"first": "John", "last": "Doe"}, "age": 30}
135-
try:
136-
validator.validate(record)
137-
except ValidationError as e:
138-
updated_record = handle_validation_error(record, e, logger)
139-
assert (
140-
isinstance(updated_record["name"], str)
141-
), "The 'name' should have been converted to a JSON string."
142-
assert (
143-
json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"}
144-
), "The JSON string is not correct."
145-
146-
def test_single_level_schema_deeply_nested_dict_to_string():
147-
record = {"name":
148-
{"first": "John", "last": "Doe",
149-
"nicknames": {"short": "JD", "long": "Johnny"},
150-
},
151-
"age": 30,
152-
}
153-
try:
154-
validator.validate(record)
155-
except ValidationError as e:
156-
updated_record = handle_validation_error(record, e, logger)
157-
assert (
158-
isinstance(updated_record["name"], str)
159-
), "The 'name' field should have been converted to a JSON string."
160-
assert (
161-
"nicknames" in json.loads(updated_record["name"])
162-
), "The JSON string does not correctly represent the nested dict."
100+
def test_single_level_schema_nested_dict_to_string():
101+
record = {"name": {"first": "John", "last": "Doe"}, "age": 30}
102+
try:
103+
nested_validator.validate(record)
104+
except ValidationError as e:
105+
updated_record = handle_validation_error(record, e, logger)
106+
assert (
107+
isinstance(updated_record["name"], str)
108+
), "The 'name' should have been converted to a JSON string."
109+
assert (
110+
json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"}
111+
), "The JSON string is not correct."
112+
113+
def test_single_level_schema_deeply_nested_dict_to_string():
114+
record = {
115+
"name": "John",
116+
"age": 30,
117+
"address": {"street": "Main", "city": {"name": "New York"}},
118+
}
119+
try:
120+
nested_validator.validate(record)
121+
except ValidationError as e:
122+
updated_record = handle_validation_error(record, e, logger)
123+
assert (
124+
isinstance(updated_record["address"], str)
125+
), "The 'address' field should have been converted to a JSON string."
126+
assert (
127+
"street" in json.loads(updated_record["address"])
128+
), "The JSON string does not correctly represent the nested dict."
129+
130+
def test_single_level_schema_deeply_nested_list_of_dicts_to_string():
131+
record = {
132+
"name": "John",
133+
"age": 30,
134+
"address": [
135+
{"street": "Main", "city": {"name": "New York"}},
136+
{"street": "Second", "city": {"name": "Los Angeles"}},
137+
],
138+
}
139+
address_str = json.dumps(record["address"])
140+
try:
141+
nested_validator.validate(record)
142+
except ValidationError as e:
143+
updated_record = handle_validation_error(record, e, logger)
144+
assert (
145+
isinstance(updated_record["address"], str)
146+
), "The 'address' field should have been converted to a JSON string."
147+
assert (
148+
updated_record["address"] == address_str
149+
), "The JSON string does not correctly represent the nested list of dicts."

0 commit comments

Comments
 (0)