Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
event_content_has_stacktrace,
filter_null_from_string,
get_stacktrace_string,
has_too_many_contributing_frames,
killswitch_enabled,
record_did_call_seer_metric,
stacktrace_exceeds_limits,
)
from sentry.services.eventstore.models import Event
from sentry.utils import metrics
Expand Down Expand Up @@ -65,7 +65,7 @@ def should_call_seer_for_grouping(
# know the other checks have passed.
or _has_empty_stacktrace_string(event, variants)
# do this after the empty stacktrace string check because it calculates the stacktrace string
or _has_too_many_contributing_frames(event, variants)
or _stacktrace_exceeds_limits(event, variants)
# **Do not add any new checks after this.** The rate limit check MUST remain the last of all
# the checks.
#
Expand Down Expand Up @@ -155,9 +155,9 @@ def _event_content_is_seer_eligible(event: Event) -> bool:
return True


def _has_too_many_contributing_frames(event: Event, variants: dict[str, BaseVariant]) -> bool:
if has_too_many_contributing_frames(event, variants, ReferrerOptions.INGEST):
record_did_call_seer_metric(event, call_made=False, blocker="excess-frames")
def _stacktrace_exceeds_limits(event: Event, variants: dict[str, BaseVariant]) -> bool:
if stacktrace_exceeds_limits(event, variants, ReferrerOptions.INGEST):
record_did_call_seer_metric(event, call_made=False, blocker="stacktrace-too-long")
return True

return False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
ReferrerOptions,
event_content_has_stacktrace,
get_stacktrace_string,
has_too_many_contributing_frames,
killswitch_enabled,
stacktrace_exceeds_limits,
)
from sentry.users.models.user import User
from sentry.utils.safe import get_path
Expand Down Expand Up @@ -92,7 +92,7 @@ def get(self, request: Request, group: Group) -> Response:
if latest_event and event_content_has_stacktrace(latest_event):
variants = latest_event.get_grouping_variants(normalize_stacktraces=True)

if not has_too_many_contributing_frames(
if not stacktrace_exceeds_limits(
latest_event, variants, ReferrerOptions.SIMILAR_ISSUES_TAB
):
grouping_info = get_grouping_info_from_variants_legacy(variants)
Expand Down
8 changes: 8 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,14 @@
flags=FLAG_MODIFIABLE_BOOL,
)

# Maximum token count for stacktraces sent to Seer for similarity analysis
register(
"seer.similarity.max_token_count",
type=Int,
default=7000,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# seer nearest neighbour endpoint timeout
register(
"embeddings-grouping.seer.nearest-neighbour-timeout",
Expand Down
29 changes: 25 additions & 4 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,19 @@ def record_did_call_seer_metric(event: Event, *, call_made: bool, blocker: str)
)


def has_too_many_contributing_frames(
def stacktrace_exceeds_limits(
event: Event | GroupEvent,
variants: dict[str, BaseVariant],
referrer: ReferrerOptions,
) -> bool:
"""
Check if a stacktrace exceeds length limits for Seer similarity analysis.

This checks both frame count and token count limits to determine if the stacktrace
is too long to send to Seer. Different platforms have different filtering behaviors:
- Platforms in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK bypass all checks
- Other platforms are checked against MAX_FRAME_COUNT and max_token_count limits
"""
platform = event.platform
shared_tags = {"referrer": referrer.value, "platform": platform}

Expand Down Expand Up @@ -354,7 +362,7 @@ def has_too_many_contributing_frames(
# truncated)
if platform in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK:
metrics.incr(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "bypass"},
)
Expand All @@ -367,15 +375,28 @@ def has_too_many_contributing_frames(

if contributing_component.frame_counts[key] > MAX_FRAME_COUNT:
metrics.incr(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we update this outcome to block_by_frame_count or something like that?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same in the call to report_token_count_metric

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh yea for sure I want to differentiate between the two, not even a nit this is a real oversight, thanks 👍

)
report_token_count_metric(event, variants, "block")
return True

# For platforms that filter by frame count, also check token count
token_count = get_token_count(event, variants, platform)
max_token_count = options.get("seer.similarity.max_token_count")

if token_count > max_token_count:
metrics.incr(
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block_by_token_count"},
)
report_token_count_metric(event, variants, "block_by_token_count")
return True

metrics.incr(
"grouping.similarity.frame_count_filter",
"grouping.similarity.stacktrace_length_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "pass"},
)
Expand Down
4 changes: 2 additions & 2 deletions src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
event_content_has_stacktrace,
filter_null_from_string,
get_stacktrace_string,
has_too_many_contributing_frames,
stacktrace_exceeds_limits,
)
from sentry.services.eventstore.models import Event
from sentry.snuba.dataset import Dataset
Expand Down Expand Up @@ -401,7 +401,7 @@ def get_events_from_nodestore(
if event and event_content_has_stacktrace(event):
variants = event.get_grouping_variants(normalize_stacktraces=True)

if has_too_many_contributing_frames(event, variants, ReferrerOptions.BACKFILL):
if stacktrace_exceeds_limits(event, variants, ReferrerOptions.BACKFILL):
invalid_event_group_ids.append(group_id)
invalid_event_reasons["excess_frames"] += 1
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_obeys_excessive_frame_check(self) -> None:

for frame_check_result, expected_result in [(True, False), (False, True)]:
with patch(
"sentry.grouping.ingest.seer._has_too_many_contributing_frames",
"sentry.grouping.ingest.seer._stacktrace_exceeds_limits",
return_value=frame_check_result,
):
assert (
Expand Down
21 changes: 9 additions & 12 deletions tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
filter_null_from_string,
get_stacktrace_string,
get_token_count,
has_too_many_contributing_frames,
stacktrace_exceeds_limits,
)
from sentry.services.eventstore.models import Event
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -906,7 +906,7 @@ def test_single_exception_simple(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -925,7 +925,7 @@ def test_single_exception_bypassed_platform(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -950,7 +950,7 @@ def test_chained_exception_simple(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -976,7 +976,7 @@ def test_chained_exception_bypassed_platform(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is expected_result
)

Expand All @@ -994,7 +994,7 @@ def test_ignores_non_contributing_frames(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is False # Not flagged as too many because only contributing frames are counted
)

Expand All @@ -1011,7 +1011,7 @@ def test_prefers_app_frames(self) -> None:
variants = self.event.get_grouping_variants(normalize_stacktraces=True)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is False # Not flagged as too many because only in-app frames are counted
)

Expand All @@ -1031,10 +1031,7 @@ def test_uses_app_or_system_variants(self) -> None:
contributing_variant, _ = get_contributing_variant_and_component(variants)
assert contributing_variant.variant_name == expected_variant_name

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
is True
)
assert stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST) is True

def test_ignores_events_not_grouped_on_stacktrace(self) -> None:
self.event.data["platform"] = "java"
Expand All @@ -1049,7 +1046,7 @@ def test_ignores_events_not_grouped_on_stacktrace(self) -> None:
assert isinstance(contributing_variant, CustomFingerprintVariant)

assert (
has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
is False # Not flagged as too many because it's grouped by fingerprint
)

Expand Down
Loading