Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
event_content_has_stacktrace,
filter_null_from_string,
get_stacktrace_string,
has_too_many_contributing_frames,
killswitch_enabled,
record_did_call_seer_metric,
stacktrace_exceeds_limits,
)
from sentry.services.eventstore.models import Event
from sentry.utils import metrics
Expand Down Expand Up @@ -65,7 +65,7 @@ def should_call_seer_for_grouping(
# know the other checks have passed.
or _has_empty_stacktrace_string(event, variants)
# do this after the empty stacktrace string check because it calculates the stacktrace string
or _has_too_many_contributing_frames(event, variants)
or _stacktrace_exceeds_limits(event, variants)
# **Do not add any new checks after this.** The rate limit check MUST remain the last of all
# the checks.
#
Expand Down Expand Up @@ -155,9 +155,9 @@ def _event_content_is_seer_eligible(event: Event) -> bool:
return True


def _has_too_many_contributing_frames(event: Event, variants: dict[str, BaseVariant]) -> bool:
if has_too_many_contributing_frames(event, variants, ReferrerOptions.INGEST):
record_did_call_seer_metric(event, call_made=False, blocker="excess-frames")
def _stacktrace_exceeds_limits(event: Event, variants: dict[str, BaseVariant]) -> bool:
if stacktrace_exceeds_limits(event, variants, ReferrerOptions.INGEST):
record_did_call_seer_metric(event, call_made=False, blocker="stacktrace-too-long")
return True

return False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
ReferrerOptions,
event_content_has_stacktrace,
get_stacktrace_string,
has_too_many_contributing_frames,
killswitch_enabled,
stacktrace_exceeds_limits,
)
from sentry.users.models.user import User
from sentry.utils.safe import get_path
Expand Down Expand Up @@ -92,7 +92,7 @@ def get(self, request: Request, group: Group) -> Response:
if latest_event and event_content_has_stacktrace(latest_event):
variants = latest_event.get_grouping_variants(normalize_stacktraces=True)

if not has_too_many_contributing_frames(
if not stacktrace_exceeds_limits(
latest_event, variants, ReferrerOptions.SIMILAR_ISSUES_TAB
):
grouping_info = get_grouping_info_from_variants_legacy(variants)
Expand Down
8 changes: 8 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,14 @@
flags=FLAG_MODIFIABLE_BOOL,
)

# Maximum token count for stacktraces sent to Seer for similarity analysis
register(
"seer.similarity.max_token_count",
type=Int,
default=7000,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# seer nearest neighbour endpoint timeout
register(
"embeddings-grouping.seer.nearest-neighbour-timeout",
Expand Down
39 changes: 31 additions & 8 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,12 +322,20 @@ def record_did_call_seer_metric(event: Event, *, call_made: bool, blocker: str)
)


def has_too_many_contributing_frames(
def stacktrace_exceeds_limits(
event: Event | GroupEvent,
variants: dict[str, BaseVariant],
referrer: ReferrerOptions,
) -> bool:
platform = event.platform
"""
Check if a stacktrace exceeds length limits for Seer similarity analysis.

This checks both frame count and token count limits to determine if the stacktrace
is too long to send to Seer. Different platforms have different filtering behaviors:
- Platforms in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK bypass all checks
- Other platforms are checked against MAX_FRAME_COUNT and max_token_count limits
"""
platform: str = event.platform or "unknown"
shared_tags = {"referrer": referrer.value, "platform": platform}

contributing_variant, contributing_component = get_contributing_variant_and_component(variants)
Expand All @@ -354,7 +362,7 @@ def has_too_many_contributing_frames(
# truncated)
if platform in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK:
metrics.incr(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "bypass"},
)
Expand All @@ -367,15 +375,28 @@ def has_too_many_contributing_frames(

if contributing_component.frame_counts[key] > MAX_FRAME_COUNT:
metrics.incr(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block"},
tags={**shared_tags, "outcome": "block_frames"},
)
report_token_count_metric(event, variants, "block")
report_token_count_metric(event, variants, "block_frames")
return True

# For platforms that filter by frame count, also check token count
token_count = get_token_count(event, variants, platform)
max_token_count = options.get("seer.similarity.max_token_count")

if token_count > max_token_count:
metrics.incr(
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block_tokens"},
)
report_token_count_metric(event, variants, "block_tokens", token_count)
return True

metrics.incr(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "pass"},
)
Expand Down Expand Up @@ -499,6 +520,7 @@ def report_token_count_metric(
event: Event | GroupEvent,
variants: dict[str, BaseVariant],
outcome: str,
token_count: int | None = None,
) -> None:
"""
Calculate token count and report metrics for stacktrace token analysis.
Expand All @@ -516,7 +538,8 @@ def report_token_count_metric(

platform = event.platform or "unknown"

token_count = get_token_count(event, variants, platform)
if token_count is None:
token_count = get_token_count(event, variants, platform)

metrics.distribution(
"grouping.similarity.token_count",
Expand Down
6 changes: 3 additions & 3 deletions src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
event_content_has_stacktrace,
filter_null_from_string,
get_stacktrace_string,
has_too_many_contributing_frames,
stacktrace_exceeds_limits,
)
from sentry.services.eventstore.models import Event
from sentry.snuba.dataset import Dataset
Expand Down Expand Up @@ -401,9 +401,9 @@ def get_events_from_nodestore(
if event and event_content_has_stacktrace(event):
variants = event.get_grouping_variants(normalize_stacktraces=True)

if has_too_many_contributing_frames(event, variants, ReferrerOptions.BACKFILL):
if stacktrace_exceeds_limits(event, variants, ReferrerOptions.BACKFILL):
invalid_event_group_ids.append(group_id)
invalid_event_reasons["excess_frames"] += 1
invalid_event_reasons["stacktrace_too_long"] += 1
continue

grouping_info = get_grouping_info_from_variants_legacy(variants)
Expand Down
77 changes: 74 additions & 3 deletions tests/sentry/grouping/seer_similarity/test_seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,92 @@ def test_too_many_frames(

sample_rate = options.get("seer.similarity.metrics_sample_rate")
mock_metrics.incr.assert_any_call(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=sample_rate,
tags={
"platform": "java",
"referrer": "ingest",
"stacktrace_type": "system",
"outcome": "block",
"outcome": "block_frames",
},
)
mock_record_did_call_seer.assert_any_call(
new_event, call_made=False, blocker="excess-frames"
new_event, call_made=False, blocker="stacktrace-too-long"
)

mock_get_similar_issues.assert_not_called()

@patch("sentry.grouping.ingest.seer.record_did_call_seer_metric")
@patch("sentry.grouping.ingest.seer.get_seer_similar_issues")
@patch("sentry.seer.similarity.utils.metrics")
def test_too_many_tokens(
self,
mock_metrics: MagicMock,
mock_get_similar_issues: MagicMock,
mock_record_did_call_seer: MagicMock,
) -> None:
self.project.update_option("sentry:similarity_backfill_completed", int(time()))

# Set a very low token limit to make the test reliable and easy to exceed
with self.options({"seer.similarity.max_token_count": 10}):
error_type = "FailedToFetchError"
error_value = "Charlie didn't bring the ball back"
# Even with simple frames, the stacktrace string will exceed 10 tokens
context_line = f"raise {error_type}('{error_value}')"
new_event = Event(
project_id=self.project.id,
event_id="33312012112120120908201304152013",
data={
"title": f"{error_type}('{error_value}')",
"exception": {
"values": [
{
"type": error_type,
"value": error_value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(
3
) # Just 3 frames, well under MAX_FRAME_COUNT
]
},
}
]
},
"platform": "java",
},
)

new_grouphash = GroupHash.objects.create(
project=self.project, group=new_event.group, hash=new_event.get_primary_hash()
)
group_hashes = list(GroupHash.objects.filter(project_id=self.project.id))
maybe_check_seer_for_matching_grouphash(
new_event, new_grouphash, new_event.get_grouping_variants(), group_hashes
)

sample_rate = options.get("seer.similarity.metrics_sample_rate")
mock_metrics.incr.assert_any_call(
"grouping.similarity.stacktrace_length_filter",
sample_rate=sample_rate,
tags={
"platform": "java",
"referrer": "ingest",
"stacktrace_type": "system",
"outcome": "block_tokens",
},
)
mock_record_did_call_seer.assert_any_call(
new_event, call_made=False, blocker="stacktrace-too-long"
)

mock_get_similar_issues.assert_not_called()

@patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[])
def test_too_many_frames_bypassed_platform(self, mock_get_similarity_data: MagicMock) -> None:
self.project.update_option("sentry:similarity_backfill_completed", int(time()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_obeys_excessive_frame_check(self) -> None:

for frame_check_result, expected_result in [(True, False), (False, True)]:
with patch(
"sentry.grouping.ingest.seer._has_too_many_contributing_frames",
"sentry.grouping.ingest.seer._stacktrace_exceeds_limits",
return_value=frame_check_result,
):
assert (
Expand Down
21 changes: 9 additions & 12 deletions tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
filter_null_from_string,
get_stacktrace_string,
get_token_count,
has_too_many_contributing_frames,
stacktrace_exceeds_limits,
)
from sentry.services.eventstore.models import Event
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -906,7 +906,7 @@ def test_single_exception_simple(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -925,7 +925,7 @@ def test_single_exception_bypassed_platform(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -950,7 +950,7 @@ def test_chained_exception_simple(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -976,7 +976,7 @@ def test_chained_exception_bypassed_platform(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -994,7 +994,7 @@ def test_ignores_non_contributing_frames(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is False # Not flagged as too many because only contributing frames are counted
)

Expand All @@ -1011,7 +1011,7 @@ def test_prefers_app_frames(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is False # Not flagged as too many because only in-app frames are counted
)

Expand All @@ -1031,10 +1031,7 @@ def test_uses_app_or_system_variants(self) -> None:
contributing_variant, _ = get_contributing_variant_and_component(variants)
assert contributing_variant.variant_name == expected_variant_name

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
is True
)
assert stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST) is True

def test_ignores_events_not_grouped_on_stacktrace(self) -> None:
self.event.data["platform"] = "java"
Expand All @@ -1049,7 +1046,7 @@ def test_ignores_events_not_grouped_on_stacktrace(self) -> None:
assert isinstance(contributing_variant, CustomFingerprintVariant)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is False # Not flagged as too many because it's grouped by fingerprint
)

Expand Down
Loading