Skip to content

Commit 340751e

Browse files
authored
Rework validation exception logic with tests (#67)
* Rework validation with tests * Fix * Fix version
1 parent d0b2aab commit 340751e

File tree

3 files changed

+208
-15
lines changed

3 files changed

+208
-15
lines changed

pyproject.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "shaped-target-clickhouse"
3-
version = "0.1.2"
3+
version = "0.1.3"
44
description = "`target-clickhouse` is a Singer target for clickhouse, built with the Meltano Singer SDK."
55
readme = "README.md"
66
authors = ["Ben Theunissen"]
@@ -43,6 +43,11 @@ ignore = [
4343
"D401", # First line should be in imperative mood
4444
"D407", # Missing dashed underline after section
4545
"FA100", # Forbidden import
46+
"S101", # Use of assert detected
47+
"G004", # Logging statement uses string formatting
48+
"TCH002",
49+
"TCH003",
50+
"ANN202",
4651
]
4752
select = ["ALL"]
4853
src = ["target_clickhouse"]

target_clickhouse/sinks.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22

33
from __future__ import annotations
44

5+
import logging
56
from typing import Any, Iterable
67

78
import jsonschema.exceptions as jsonschema_exceptions
89
import simplejson as json
910
import sqlalchemy
11+
from jsonschema import ValidationError
1012
from pendulum import now
1113
from singer_sdk.sinks import SQLSink
1214
from sqlalchemy.sql.expression import bindparam
@@ -150,24 +152,48 @@ def _validate_and_parse(self, record: dict) -> dict:
150152
try:
151153
self._validator.validate(record)
152154
except jsonschema_exceptions.ValidationError as e:
153-
if "is not of type" in e.message and "'string'" in e.message:
154-
self.logger.warning(
155-
"Received non valid record for string type, "
156-
"attempting forced conversion to string",
157-
)
158-
for key, value in record.items():
159-
if isinstance(value, dict):
160-
record[key] = json.dumps(value)
161-
elif not isinstance(value, str):
162-
record[key] = str(value)
163-
self.logger.warning("Validating converted record")
164-
self._validator.validate(record)
165-
else:
166-
raise
155+
record = handle_validation_error(record, e, self.logger)
156+
self._validator.validate(record)
167157

168158
self._parse_timestamps_in_record(
169159
record=record,
170160
schema=self.schema,
171161
treatment=self.datetime_error_treatment,
172162
)
173163
return record
164+
165+
166+
def handle_validation_error(record,
167+
e: ValidationError,
168+
logger: logging.Logger | None = None):
169+
if "'string'" in e.message:
170+
if logger:
171+
logger.warning(
172+
f"Received non valid record for types 'string', {e.path}, "
173+
f"attempting conversion for record, {record}",
174+
)
175+
176+
# e.path is deque which is iterable, we convert it to list to access by index
177+
key_path = list(e.path)
178+
179+
# Access the problematic value using the key_path
180+
current_level = record
181+
for key in key_path[:-1]: # Go to the parent level of the problematic key
182+
current_level = current_level[key]
183+
184+
problem_key = key_path[-1]
185+
problem_value = current_level[problem_key]
186+
187+
# Convert the problematic value to string only if it's not null
188+
if problem_value is not None:
189+
if isinstance(problem_value, dict):
190+
# Convert the dict to JSON string
191+
current_level[problem_key] = json.dumps(problem_value)
192+
else:
193+
current_level[problem_key] = str(problem_value)
194+
195+
if logger:
196+
logger.warning("Validating converted record")
197+
return record
198+
return None
199+
return None

tests/test_validation.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import json
2+
import logging
3+
from typing import Optional
4+
5+
from jsonschema import Draft7Validator, ValidationError
6+
7+
# Schema that allows a field to be either a string or null
8+
schema = {
9+
"type": "object",
10+
"properties": {
11+
"name": {"type": ["string", "null"]},
12+
"age": {"type": "number"},
13+
},
14+
"required": ["name", "age"],
15+
}
16+
17+
nested_schema = {
18+
"type": "object",
19+
"properties": {
20+
"name": {"type": ["string", "null"]},
21+
"age": {"type": "number"},
22+
"address": {
23+
"type": "object",
24+
"properties": {
25+
"street": {"type": "string"},
26+
"city": {"type": "string"},
27+
},
28+
"required": ["street", "city"],
29+
},
30+
},
31+
"required": ["name", "age", "address"],
32+
}
33+
34+
35+
# Validator instance
36+
validator = Draft7Validator(schema)
37+
38+
# Function to handle validation errors
39+
def handle_validation_error(record,
40+
e: ValidationError,
41+
logger: Optional[logging.Logger] = None):
42+
if "'string'" in e.message:
43+
if logger:
44+
logger.warning(
45+
f"Received non valid record for types 'string', {e.path}, "
46+
f"attempting conversion for record, {record}",
47+
)
48+
49+
50+
key_path = list(e.path)
51+
52+
# Access the problematic value using the key_path
53+
current_level = record
54+
for key in key_path[:-1]: # Go to parent of the problematic key
55+
current_level = current_level[key]
56+
57+
problem_key = key_path[-1]
58+
problem_value = current_level[problem_key]
59+
60+
# Convert the problematic value to string only if it's not null.
61+
if problem_value is not None:
62+
current_level[problem_key] = str(problem_value)
63+
if logger:
64+
logger.warning("Validating converted record")
65+
return record
66+
return None
67+
return None
68+
69+
# Set up the logger
70+
logging.basicConfig(level=logging.INFO)
71+
logger = logging.getLogger(__name__)
72+
73+
# Test cases
74+
def test_validation_string_conversion():
75+
record = {"name": 123, "age": 30}
76+
try:
77+
validator.validate(record)
78+
except ValidationError as e:
79+
updated_record = handle_validation_error(record, e, logger)
80+
assert (
81+
updated_record["name"] == "123"
82+
), "The 'name' should have been converted to a string."
83+
validator.validate(updated_record) # This should not raise an error
84+
85+
def test_validation_no_error_raised():
86+
record = {"name": "John", "age": 30}
87+
# This should not raise an error, hence no need to handle validation
88+
validator.validate(record) # This should not raise an error
89+
90+
def test_validation_null_allowed():
91+
record = {"name": None, "age": 30}
92+
try:
93+
validator.validate(record)
94+
except ValidationError as e:
95+
updated_record = handle_validation_error(record, e, logger)
96+
assert (
97+
updated_record is None
98+
), "The 'name' field is null and should be valid."
99+
100+
def test_validation_non_string_non_null_field():
101+
record = {"name": {"first": "John", "last": "Doe"}, "age": 30}
102+
try:
103+
validator.validate(record)
104+
except ValidationError as e:
105+
updated_record = handle_validation_error(record, e, logger)
106+
assert (
107+
isinstance(updated_record["name"], str)
108+
), "The 'name' should have been converted to a string."
109+
110+
def test_nested_dict_string_conversion():
111+
record = {"name": "John", "age": 30, "address": {"street": 123, "city": "New York"}}
112+
try:
113+
validator.validate(record)
114+
except ValidationError as e:
115+
updated_record = handle_validation_error(record, e, logger)
116+
assert (
117+
updated_record["address"]["street"] == "123"
118+
), "The 'street' should have been converted to a string."
119+
validator.validate(updated_record) # This should not raise an error
120+
121+
def test_nested_dict_with_nested_non_string():
122+
record = {"name": "John", "age": 30,
123+
"address": {"street": "Main", "city": {"name": "New York"}}}
124+
try:
125+
validator.validate(record)
126+
except ValidationError as e:
127+
updated_record = handle_validation_error(record, e, logger)
128+
assert (
129+
isinstance(updated_record["address"]["city"], str)
130+
), "The 'city' should have been converted to a string."
131+
validator.validate(updated_record) # This should not raise an error
132+
133+
def test_single_level_schema_nested_dict_to_string():
134+
record = {"name": {"first": "John", "last": "Doe"}, "age": 30}
135+
try:
136+
validator.validate(record)
137+
except ValidationError as e:
138+
updated_record = handle_validation_error(record, e, logger)
139+
assert (
140+
isinstance(updated_record["name"], str)
141+
), "The 'name' should have been converted to a JSON string."
142+
assert (
143+
json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"}
144+
), "The JSON string is not correct."
145+
146+
def test_single_level_schema_deeply_nested_dict_to_string():
147+
record = {"name":
148+
{"first": "John", "last": "Doe",
149+
"nicknames": {"short": "JD", "long": "Johnny"},
150+
},
151+
"age": 30,
152+
}
153+
try:
154+
validator.validate(record)
155+
except ValidationError as e:
156+
updated_record = handle_validation_error(record, e, logger)
157+
assert (
158+
isinstance(updated_record["name"], str)
159+
), "The 'name' field should have been converted to a JSON string."
160+
assert (
161+
"nicknames" in json.loads(updated_record["name"])
162+
), "The JSON string does not correctly represent the nested dict."

0 commit comments

Comments
 (0)