diff --git a/integration_tests/tests/adapter_query_runner.py b/integration_tests/tests/adapter_query_runner.py index 6ac9d96ff..c45cbb83a 100644 --- a/integration_tests/tests/adapter_query_runner.py +++ b/integration_tests/tests/adapter_query_runner.py @@ -52,9 +52,14 @@ def _serialize_value(val: Any) -> Any: * Everything else is returned unchanged. """ if isinstance(val, Decimal): - # Match the Jinja macro: normalize, then int or float + # Match the Jinja macro: normalize, then int or float. + # Note: for special values (Infinity, NaN), as_tuple().exponent is a + # string ('F' or 'n'), not an int — convert those directly to float. normalized = val.normalize() - if normalized.as_tuple().exponent >= 0: + exponent = normalized.as_tuple().exponent + if isinstance(exponent, str): + return float(normalized) + if exponent >= 0: return int(normalized) return float(normalized) if isinstance(val, (datetime, date, time)): diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py index 51d7a05b0..c68fc62fe 100644 --- a/integration_tests/tests/test_dimension_anomalies.py +++ b/integration_tests/tests/test_dimension_anomalies.py @@ -315,3 +315,83 @@ def test_anomaly_in_detection_period( ) assert test_result["status"] == expected_status + + +def test_dimension_anomalies_alert_description_few_failures( + test_id: str, dbt_project: DbtProject +): + """When ≤5 dimension values fail, description shows each one's anomaly details.""" + utc_today = datetime.utcnow().date() + test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) + + # 3 dimension values all spike on test_date (training: 1/day, test: 10/day) + anomalous_dimensions = ["Batman", "Superman", "Spiderman"] + + data: List[Dict[str, Any]] = [ + {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero} + for hero in anomalous_dimensions + for _ in range(10) + ] + data += [ + {TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero} + for cur_date in training_dates + for hero in anomalous_dimensions + ] + + test_args = { + "timestamp_column": TIMESTAMP_COLUMN, + "dimensions": ["superhero"], + "sensitivity": 2, + } + test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data) + assert test_result["status"] == "fail" + + description = test_result["test_results_description"] + # Each failing dimension value should appear in the description + for hero in anomalous_dimensions: + assert hero in description, f"Expected '{hero}' in description: {description}" + # Should NOT show the high-volume summary message + assert "dimension values are anomalous" not in description + + +def test_dimension_anomalies_alert_description_many_failures( + test_id: str, dbt_project: DbtProject +): + """When >5 dimension values fail, description shows a count summary.""" + utc_today = datetime.utcnow().date() + test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) + + # 6 dimension values all spike on test_date (>5 threshold) + anomalous_dimensions = [ + "Batman", + "Superman", + "Spiderman", + "IronMan", + "Thor", + "Hulk", + ] + + data: List[Dict[str, Any]] = [ + {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": hero} + for hero in anomalous_dimensions + for _ in range(10) + ] + data += [ + {TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), "superhero": hero} + for cur_date in training_dates + for hero in anomalous_dimensions + ] + + test_args = { + "timestamp_column": TIMESTAMP_COLUMN, + "dimensions": ["superhero"], + "sensitivity": 2, + } + test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data) + assert test_result["status"] == "fail" + + description = test_result["test_results_description"] + # Should show the count summary for many failures + assert ( + "dimension values are anomalous" in description + ), f"Expected summary message in description: {description}" diff --git a/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql b/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql index ac62f3d4a..6d173a580 100644 --- a/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql +++ b/macros/edr/data_monitoring/anomaly_detection/store_anomaly_test_results.sql @@ -74,23 +74,37 @@ and upper(column_name) = upper({{ elementary.const_as_string(column_name) }}) {%- endif %} {%- endset -%} - {% set test_results_description %} - {% if rows_with_score %} - {{ elementary.insensitive_get_dict_value(rows_with_score[-1], 'anomaly_description') }} - {% else %} - Not enough data to calculate anomaly score. - {% endif %} - {% endset %} {% set failures = namespace(data=0) %} {% set filtered_anomaly_scores_rows = [] %} + {% set anomalous_rows = [] %} {% for row in anomaly_scores_rows %} {% if row.anomaly_score is not none %} {% do filtered_anomaly_scores_rows.append(row) %} {% if row.is_anomalous %} {% set failures.data = failures.data + 1 %} + {% do anomalous_rows.append(row) %} {% endif %} {% endif %} {% endfor %} + {%- set max_dimension_alerts = 5 -%} + {% set test_results_description %} + {%- if rows_with_score -%} + {%- set sample_row = rows_with_score[-1] -%} + {%- set row_dimension = elementary.insensitive_get_dict_value(sample_row, "dimension") -%} + {%- if row_dimension is not none and anomalous_rows | length > 0 -%} + {%- if anomalous_rows | length > max_dimension_alerts -%} + {%- set remaining = (anomalous_rows | length) - max_dimension_alerts -%} + {{ anomalous_rows | length }} dimension values are anomalous. Showing first {{ max_dimension_alerts }}: {% for row in anomalous_rows[:max_dimension_alerts] %}{{ elementary.insensitive_get_dict_value(row, "dimension_value") }}{% if not loop.last %}, {% endif %}{% endfor %}, and {{ remaining }} more. + {%- else -%} + {% for row in anomalous_rows %}{{ elementary.insensitive_get_dict_value(row, "anomaly_description") }}{% if not loop.last %} | {% endif %}{% endfor %} + {%- endif -%} + {%- else -%} + {{ elementary.insensitive_get_dict_value(rows_with_score[-1], "anomaly_description") }} + {%- endif -%} + {%- else -%} + Not enough data to calculate anomaly score. + {%- endif -%} + {% endset %} {% set test_result_dict = { "id": elementary.insensitive_get_dict_value(latest_row, "id"), "data_issue_id": elementary.insensitive_get_dict_value(