Skip to content

Commit 887eb86

Browse files
authored
Add unit test for date parsing and add manual trimming for invalid date types (#141)
* Fix date parsing * Fix date parsing * Update version to 0.2.3 * Fix lint
1 parent 278d94c commit 887eb86

File tree

6 files changed

+133
-3
lines changed

6 files changed

+133
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "shaped-target-clickhouse"
3-
version = "0.2.2"
3+
version = "0.2.3"
44
description = "`target-clickhouse` is a Singer target for clickhouse, built with the Meltano Singer SDK."
55
readme = "README.md"
66
authors = ["Ben Theunissen"]

target_clickhouse/sinks.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@
99
import simplejson as json
1010
import sqlalchemy
1111
from pendulum import now
12+
from singer_sdk.helpers._compat import (
13+
date_fromisoformat,
14+
datetime_fromisoformat,
15+
time_fromisoformat,
16+
)
1217
from singer_sdk.helpers._typing import (
1318
DatetimeErrorTreatmentEnum,
19+
get_datelike_property_type,
20+
handle_invalid_timestamp_in_record,
1421
)
1522
from singer_sdk.sinks import SQLSink
1623
from sqlalchemy.sql.expression import bindparam
@@ -174,10 +181,64 @@ def _validate_and_parse(self, record: dict) -> dict:
174181
except jsonschema_exceptions.ValidationError as e:
175182
if self.logger:
176183
self.logger.exception(f"Record failed validation: {record}")
177-
raise e # noqa: RERAISES
184+
raise e # : RERAISES
178185

179186
return record
180187

188+
def _parse_timestamps_in_record(
189+
self,
190+
record: dict,
191+
schema: dict,
192+
treatment: DatetimeErrorTreatmentEnum,
193+
) -> None:
194+
"""Parse strings to datetime.datetime values, repairing or erroring on failure.
195+
196+
Attempts to parse every field that is of type date/datetime/time. If its value
197+
is out of range, repair logic will be driven by the `treatment` input arg:
198+
MAX, NULL, or ERROR.
199+
200+
Args:
201+
record: Individual record in the stream.
202+
schema: TODO
203+
treatment: TODO
204+
"""
205+
for key, value in record.items():
206+
if key not in schema["properties"]:
207+
self.logger.warning("No schema for record field '%s'", key)
208+
continue
209+
datelike_type = get_datelike_property_type(schema["properties"][key])
210+
if datelike_type:
211+
date_val = value
212+
try:
213+
if value is not None:
214+
if datelike_type == "time":
215+
date_val = time_fromisoformat(date_val)
216+
elif datelike_type == "date":
217+
# Trim time value from date fields.
218+
if "T" in date_val:
219+
# Split on T and get the first part.
220+
date_val = date_val.split("T")[0]
221+
self.logger.warning(
222+
"Trimmed time value from date field '%s': %s",
223+
key,
224+
date_val,
225+
)
226+
date_val = date_fromisoformat(date_val)
227+
else:
228+
date_val = datetime_fromisoformat(date_val)
229+
except ValueError as ex:
230+
date_val = handle_invalid_timestamp_in_record(
231+
record,
232+
[key],
233+
date_val,
234+
datelike_type,
235+
ex,
236+
treatment,
237+
self.logger,
238+
)
239+
record[key] = date_val
240+
241+
181242
def pre_validate_for_string_type(
182243
record: dict,
183244
schema: dict,

tests/conftest.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
11
"""Test Configuration."""
2+
from pathlib import Path
3+
4+
from singer_sdk.testing.templates import TargetFileTestTemplate
25

36
pytest_plugins = ()
7+
8+
class TargetClickhouseFileTestTemplate(TargetFileTestTemplate):
9+
"""Base Target File Test Template.
10+
11+
Use this when sourcing Target test input from a .singer file.
12+
"""
13+
14+
@property
15+
def singer_filepath(self):
16+
"""Get path to singer JSONL formatted messages file.
17+
18+
Files will be sourced from `./target_test_streams/<test name>.singer`.
19+
20+
Returns
21+
The expected Path to this tests singer file.
22+
"""
23+
current_file_path = Path(__file__).resolve()
24+
return current_file_path.parent / "target_test_streams" / f"{self.name}.singer"

tests/target_test_cases.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
2+
import datetime
3+
import logging
4+
5+
from singer_sdk.testing.suites import TestSuite
6+
from sqlalchemy import text
7+
8+
from tests.conftest import TargetClickhouseFileTestTemplate
9+
10+
logger = logging.getLogger(__name__)
11+
12+
class TestDateTypeTargetClickhouse(TargetClickhouseFileTestTemplate):
13+
"""Test date type can be ingested into Clickhouse."""
14+
15+
name = "date_type"
16+
17+
def validate(self) -> None:
18+
"""Validate the data in the target."""
19+
connector = self.target.default_sink_class.connector_class(self.target.config)
20+
result = connector.connection.execute(
21+
statement=text("SELECT * FROM date_type"),
22+
).fetchall()
23+
record_id_1 = 1
24+
record_1 = next(iter([
25+
record for record in result if record[0] == record_id_1
26+
]))
27+
assert record_1[1] == datetime.date(2024, 3, 15)
28+
record_id_2 = 2
29+
record_2 = next(iter([
30+
record for record in result if record[0] == record_id_2
31+
]))
32+
assert record_2[1] == datetime.date(2024, 3, 16)
33+
34+
custom_target_test_suite = TestSuite(
35+
kind="target",
36+
tests=[
37+
TestDateTypeTargetClickhouse,
38+
],
39+
)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{"type": "SCHEMA", "stream": "date_type", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "date": {"format": "date", "type": [ "null", "string" ] }}}}
2+
{"type": "RECORD", "stream": "date_type", "record": {"id": 1, "date": "2024-03-15"}}
3+
{"type": "RECORD", "stream": "date_type", "record": {"id": 2, "date": "2024-03-16T00:00:00+00:00"}}
4+
{"type": "RECORD", "stream": "date_type", "record": {"id": 3, "date": null}}

tests/test_core.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from singer_sdk.testing import get_target_test_class
88

99
from target_clickhouse.target import TargetClickhouse
10+
from tests.target_test_cases import custom_target_test_suite
1011

1112
TEST_CONFIG: dict[str, t.Any] = {
1213
"sqlalchemy_url": "clickhouse+http://default:@localhost:18123",
@@ -39,16 +40,20 @@
3940
StandardTargetTests = get_target_test_class(
4041
target_class=TargetClickhouse,
4142
config=TEST_CONFIG,
43+
custom_suites=[custom_target_test_suite],
4244
)
4345

4446

45-
class TestStandardTargetClickhouse(StandardTargetTests): # type: ignore[misc, valid-type]
47+
class TestStandardTargetClickhouse(
48+
StandardTargetTests, # type: ignore[misc, valid-type]
49+
):
4650
"""Standard Target Tests."""
4751

4852

4953
SpreadTargetTests = get_target_test_class(
5054
target_class=TargetClickhouse,
5155
config=TEST_CONFIG_SPREAD,
56+
custom_suites=[custom_target_test_suite],
5257
)
5358

5459

0 commit comments

Comments
 (0)