getsentry · yuvmen · Nov 10, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
@@ -25,9 +25,9 @@
     event_content_has_stacktrace,
     filter_null_from_string,
     get_stacktrace_string,
-    has_too_many_contributing_frames,
     killswitch_enabled,
     record_did_call_seer_metric,
+    stacktrace_exceeds_limits,
 )
 from sentry.services.eventstore.models import Event
 from sentry.utils import metrics
@@ -65,7 +65,7 @@ def should_call_seer_for_grouping(
         # know the other checks have passed.
         or _has_empty_stacktrace_string(event, variants)
         # do this after the empty stacktrace string check because it calculates the stacktrace string
-        or _has_too_many_contributing_frames(event, variants)
+        or _stacktrace_exceeds_limits(event, variants)
         # **Do not add any new checks after this.** The rate limit check MUST remain the last of all
         # the checks.
         #
@@ -155,9 +155,9 @@ def _event_content_is_seer_eligible(event: Event) -> bool:
     return True
 
 
-def _has_too_many_contributing_frames(event: Event, variants: dict[str, BaseVariant]) -> bool:
-    if has_too_many_contributing_frames(event, variants, ReferrerOptions.INGEST):
-        record_did_call_seer_metric(event, call_made=False, blocker="excess-frames")
+def _stacktrace_exceeds_limits(event: Event, variants: dict[str, BaseVariant]) -> bool:
+    if stacktrace_exceeds_limits(event, variants, ReferrerOptions.INGEST):
+        record_did_call_seer_metric(event, call_made=False, blocker="stacktrace-too-long")
         return True
 
     return False

@@ -26,8 +26,8 @@
     ReferrerOptions,
     event_content_has_stacktrace,
     get_stacktrace_string,
-    has_too_many_contributing_frames,
     killswitch_enabled,
+    stacktrace_exceeds_limits,
 )
 from sentry.users.models.user import User
 from sentry.utils.safe import get_path
@@ -92,7 +92,7 @@ def get(self, request: Request, group: Group) -> Response:
         if latest_event and event_content_has_stacktrace(latest_event):
             variants = latest_event.get_grouping_variants(normalize_stacktraces=True)
 
-            if not has_too_many_contributing_frames(
+            if not stacktrace_exceeds_limits(
                 latest_event, variants, ReferrerOptions.SIMILAR_ISSUES_TAB
             ):
                 grouping_info = get_grouping_info_from_variants_legacy(variants)

diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py
@@ -1158,6 +1158,14 @@
     flags=FLAG_MODIFIABLE_BOOL,
 )
 
+# Maximum token count for stacktraces sent to Seer for similarity analysis
+register(
+    "seer.similarity.max_token_count",
+    type=Int,
+    default=7000,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
+
 # seer nearest neighbour endpoint timeout
 register(
     "embeddings-grouping.seer.nearest-neighbour-timeout",

@@ -322,12 +322,20 @@ def record_did_call_seer_metric(event: Event, *, call_made: bool, blocker: str)
     )
 
 
-def has_too_many_contributing_frames(
+def stacktrace_exceeds_limits(
     event: Event | GroupEvent,
     variants: dict[str, BaseVariant],
     referrer: ReferrerOptions,
 ) -> bool:
-    platform = event.platform
+    """
+    Check if a stacktrace exceeds length limits for Seer similarity analysis.
+
+    This checks both frame count and token count limits to determine if the stacktrace
+    is too long to send to Seer. Different platforms have different filtering behaviors:
+    - Platforms in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK bypass all checks
+    - Other platforms are checked against MAX_FRAME_COUNT and max_token_count limits
+    """
+    platform: str = event.platform or "unknown"
     shared_tags = {"referrer": referrer.value, "platform": platform}
 
     contributing_variant, contributing_component = get_contributing_variant_and_component(variants)
@@ -354,7 +362,7 @@ def has_too_many_contributing_frames(
     # truncated)
     if platform in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK:
         metrics.incr(
-            "grouping.similarity.frame_count_filter",
+            "grouping.similarity.stacktrace_length_filter",
             sample_rate=options.get("seer.similarity.metrics_sample_rate"),
             tags={**shared_tags, "outcome": "bypass"},
         )
@@ -367,15 +375,28 @@ def has_too_many_contributing_frames(
 
     if contributing_component.frame_counts[key] > MAX_FRAME_COUNT:
         metrics.incr(
-            "grouping.similarity.frame_count_filter",
+            "grouping.similarity.stacktrace_length_filter",
             sample_rate=options.get("seer.similarity.metrics_sample_rate"),
-            tags={**shared_tags, "outcome": "block"},
+            tags={**shared_tags, "outcome": "block_frames"},
         )
-        report_token_count_metric(event, variants, "block")
+        report_token_count_metric(event, variants, "block_frames")
+        return True
+
+    # For platforms that filter by frame count, also check token count
+    token_count = get_token_count(event, variants, platform)
+    max_token_count = options.get("seer.similarity.max_token_count")
+
+    if token_count > max_token_count:
+        metrics.incr(
+            "grouping.similarity.stacktrace_length_filter",
+            sample_rate=options.get("seer.similarity.metrics_sample_rate"),
+            tags={**shared_tags, "outcome": "block_tokens"},
+        )
+        report_token_count_metric(event, variants, "block_tokens", token_count)
         return True
 
     metrics.incr(
-        "grouping.similarity.frame_count_filter",
+        "grouping.similarity.stacktrace_length_filter",
         sample_rate=options.get("seer.similarity.metrics_sample_rate"),
         tags={**shared_tags, "outcome": "pass"},
     )
@@ -499,6 +520,7 @@ def report_token_count_metric(
     event: Event | GroupEvent,
     variants: dict[str, BaseVariant],
     outcome: str,
+    token_count: int | None = None,
 ) -> None:
     """
     Calculate token count and report metrics for stacktrace token analysis.
@@ -516,7 +538,8 @@ def report_token_count_metric(
 
     platform = event.platform or "unknown"
 
-    token_count = get_token_count(event, variants, platform)
+    if token_count is None:
+        token_count = get_token_count(event, variants, platform)
 
     metrics.distribution(
         "grouping.similarity.token_count",

@@ -24,7 +24,7 @@
     event_content_has_stacktrace,
     filter_null_from_string,
     get_stacktrace_string,
-    has_too_many_contributing_frames,
+    stacktrace_exceeds_limits,
 )
 from sentry.services.eventstore.models import Event
 from sentry.snuba.dataset import Dataset
@@ -401,9 +401,9 @@ def get_events_from_nodestore(
         if event and event_content_has_stacktrace(event):
             variants = event.get_grouping_variants(normalize_stacktraces=True)
 
-            if has_too_many_contributing_frames(event, variants, ReferrerOptions.BACKFILL):
+            if stacktrace_exceeds_limits(event, variants, ReferrerOptions.BACKFILL):
                 invalid_event_group_ids.append(group_id)
-                invalid_event_reasons["excess_frames"] += 1
+                invalid_event_reasons["stacktrace_too_long"] += 1
                 continue
 
             grouping_info = get_grouping_info_from_variants_legacy(variants)

@@ -123,21 +123,92 @@ def test_too_many_frames(
 
         sample_rate = options.get("seer.similarity.metrics_sample_rate")
         mock_metrics.incr.assert_any_call(
-            "grouping.similarity.frame_count_filter",
+            "grouping.similarity.stacktrace_length_filter",
             sample_rate=sample_rate,
             tags={
                 "platform": "java",
                 "referrer": "ingest",
                 "stacktrace_type": "system",
-                "outcome": "block",
+                "outcome": "block_frames",
             },
         )
         mock_record_did_call_seer.assert_any_call(
-            new_event, call_made=False, blocker="excess-frames"
+            new_event, call_made=False, blocker="stacktrace-too-long"
         )
 
         mock_get_similar_issues.assert_not_called()
 
+    @patch("sentry.grouping.ingest.seer.record_did_call_seer_metric")
+    @patch("sentry.grouping.ingest.seer.get_seer_similar_issues")
+    @patch("sentry.seer.similarity.utils.metrics")
+    def test_too_many_tokens(
+        self,
+        mock_metrics: MagicMock,
+        mock_get_similar_issues: MagicMock,
+        mock_record_did_call_seer: MagicMock,
+    ) -> None:
+        self.project.update_option("sentry:similarity_backfill_completed", int(time()))
+
+        # Set a very low token limit to make the test reliable and easy to exceed
+        with self.options({"seer.similarity.max_token_count": 10}):
+            error_type = "FailedToFetchError"
+            error_value = "Charlie didn't bring the ball back"
+            # Even with simple frames, the stacktrace string will exceed 10 tokens
+            context_line = f"raise {error_type}('{error_value}')"
+            new_event = Event(
+                project_id=self.project.id,
+                event_id="33312012112120120908201304152013",
+                data={
+                    "title": f"{error_type}('{error_value}')",
+                    "exception": {
+                        "values": [
+                            {
+                                "type": error_type,
+                                "value": error_value,
+                                "stacktrace": {
+                                    "frames": [
+                                        {
+                                            "function": f"play_fetch_{i}",
+                                            "filename": f"dogpark{i}.py",
+                                            "context_line": context_line,
+                                        }
+                                        for i in range(
+                                            3
+                                        )  # Just 3 frames, well under MAX_FRAME_COUNT
+                                    ]
+                                },
+                            }
+                        ]
+                    },
+                    "platform": "java",
+                },
+            )
+
+            new_grouphash = GroupHash.objects.create(
+                project=self.project, group=new_event.group, hash=new_event.get_primary_hash()
+            )
+            group_hashes = list(GroupHash.objects.filter(project_id=self.project.id))
+            maybe_check_seer_for_matching_grouphash(
+                new_event, new_grouphash, new_event.get_grouping_variants(), group_hashes
+            )
+
+            sample_rate = options.get("seer.similarity.metrics_sample_rate")
+            mock_metrics.incr.assert_any_call(
+                "grouping.similarity.stacktrace_length_filter",
+                sample_rate=sample_rate,
+                tags={
+                    "platform": "java",
+                    "referrer": "ingest",
+                    "stacktrace_type": "system",
+                    "outcome": "block_tokens",
+                },
+            )
+            mock_record_did_call_seer.assert_any_call(
+                new_event, call_made=False, blocker="stacktrace-too-long"
+            )
+
+            mock_get_similar_issues.assert_not_called()
+
     @patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[])
     def test_too_many_frames_bypassed_platform(self, mock_get_similarity_data: MagicMock) -> None:
         self.project.update_option("sentry:similarity_backfill_completed", int(time()))

@@ -207,7 +207,7 @@ def test_obeys_excessive_frame_check(self) -> None:
 
         for frame_check_result, expected_result in [(True, False), (False, True)]:
             with patch(
-                "sentry.grouping.ingest.seer._has_too_many_contributing_frames",
+                "sentry.grouping.ingest.seer._stacktrace_exceeds_limits",
                 return_value=frame_check_result,
             ):
                 assert (

@@ -14,7 +14,7 @@
     filter_null_from_string,
     get_stacktrace_string,
     get_token_count,
-    has_too_many_contributing_frames,
+    stacktrace_exceeds_limits,
 )
 from sentry.services.eventstore.models import Event
 from sentry.testutils.cases import TestCase
@@ -906,7 +906,7 @@ def test_single_exception_simple(self) -> None:
             variants = self.event.get_grouping_variants(normalize_stacktraces=True)
 
             assert (
-                has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+                stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
                 is expected_result
             )
 
@@ -925,7 +925,7 @@ def test_single_exception_bypassed_platform(self) -> None:
             variants = self.event.get_grouping_variants(normalize_stacktraces=True)
 
             assert (
-                has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+                stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
                 is expected_result
             )
 
@@ -950,7 +950,7 @@ def test_chained_exception_simple(self) -> None:
             variants = self.event.get_grouping_variants(normalize_stacktraces=True)
 
             assert (
-                has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+                stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
                 is expected_result
             )
 
@@ -976,7 +976,7 @@ def test_chained_exception_bypassed_platform(self) -> None:
             variants = self.event.get_grouping_variants(normalize_stacktraces=True)
 
             assert (
-                has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+                stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
                 is expected_result
             )
 
@@ -994,7 +994,7 @@ def test_ignores_non_contributing_frames(self) -> None:
         variants = self.event.get_grouping_variants(normalize_stacktraces=True)
 
         assert (
-            has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+            stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
             is False  # Not flagged as too many because only contributing frames are counted
         )
 
@@ -1011,7 +1011,7 @@ def test_prefers_app_frames(self) -> None:
         variants = self.event.get_grouping_variants(normalize_stacktraces=True)
 
         assert (
-            has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+            stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
             is False  # Not flagged as too many because only in-app frames are counted
         )
 
@@ -1031,10 +1031,7 @@ def test_uses_app_or_system_variants(self) -> None:
             contributing_variant, _ = get_contributing_variant_and_component(variants)
             assert contributing_variant.variant_name == expected_variant_name
 
-            assert (
-                has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
-                is True
-            )
+            assert stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST) is True
 
     def test_ignores_events_not_grouped_on_stacktrace(self) -> None:
         self.event.data["platform"] = "java"
@@ -1049,7 +1046,7 @@ def test_ignores_events_not_grouped_on_stacktrace(self) -> None:
         assert isinstance(contributing_variant, CustomFingerprintVariant)
 
         assert (
-            has_too_many_contributing_frames(self.event, variants, ReferrerOptions.INGEST)
+            stacktrace_exceeds_limits(self.event, variants, ReferrerOptions.INGEST)
             is False  # Not flagged as too many because it's grouped by fingerprint
         )