Skip to content

Commit d0ece3c

Browse files
committed
increase limit, introduce randomness
1 parent 7d243a7 commit d0ece3c

File tree

4 files changed

+30
-20
lines changed

4 files changed

+30
-20
lines changed

src/sentry/seer/sentry_data_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class TraceData(BaseModel):
3030
spans: list[Span]
3131

3232

33-
class EvidenceTraceData(BaseModel): # hate this name
33+
class EvidenceTraceData(BaseModel):
3434
trace_id: str
3535
transaction_name: str
3636

src/sentry/tasks/llm_issue_detection/detection.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
SEER_TIMEOUT_S = 120
3232
SEER_RETRIES = 1
3333
START_TIME_DELTA_MINUTES = 30
34+
TRANSACTION_BATCH_SIZE = 100
3435

3536

3637
seer_issue_detection_connection_pool = connection_from_url(
@@ -208,8 +209,8 @@ def detect_llm_issues_for_project(project_id: int) -> None:
208209
"""
209210
Process a single project for LLM issue detection.
210211
211-
Gets the project's top 50 transaction spans from the last START_TIME_DELTA_MINUTES, sorted by -sum(span.duration).
212-
From the 50 longest transactions, dedupes on normalized transaction_name.
212+
Gets the project's top TRANSACTION_BATCH_SIZE transaction spans from the last START_TIME_DELTA_MINUTES, sorted by -sum(span.duration).
213+
From those transactions, dedupes on normalized transaction_name.
213214
For each deduped transaction, gets first trace_id from the start of time window, which has small random variation.
214215
Sends these trace_ids to seer, which uses get_trace_waterfall to construct an EAPTrace to analyze.
215216
"""
@@ -224,7 +225,7 @@ def detect_llm_issues_for_project(project_id: int) -> None:
224225
return
225226

226227
evidence_traces = get_project_top_transaction_traces_for_llm_detection(
227-
project_id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES
228+
project_id, limit=TRANSACTION_BATCH_SIZE, start_time_delta_minutes=START_TIME_DELTA_MINUTES
228229
)
229230
if not evidence_traces:
230231
return

src/sentry/tasks/llm_issue_detection/trace_data.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,22 +33,27 @@ def get_project_top_transaction_traces_for_llm_detection(
3333
logger.exception("Project does not exist", extra={"project_id": project_id})
3434
return []
3535

36-
random_offset = random.randint(1, 8)
3736
end_time = datetime.now(UTC)
3837
start_time = end_time - timedelta(minutes=start_time_delta_minutes)
39-
40-
# use for both queries to ensure they are searching the same time window
41-
snuba_params = SnubaParams(
42-
start=start_time,
43-
end=end_time,
44-
projects=[project],
45-
organization=project.organization,
46-
)
4738
config = SearchResolverConfig(auto_fields=True)
4839

49-
# Step 1: Get top transactions by total time in time window
40+
def _build_snuba_params(start: datetime) -> SnubaParams:
41+
"""
42+
Both queries have different start times and the same end time.
43+
"""
44+
return SnubaParams(
45+
start=start,
46+
end=end_time,
47+
projects=[project],
48+
organization=project.organization,
49+
)
50+
51+
transaction_snuba_params = _build_snuba_params(start_time)
52+
random_offset = random.randint(1, 8)
53+
trace_snuba_params = _build_snuba_params(start_time + timedelta(minutes=random_offset))
54+
5055
transactions_result = Spans.run_table_query(
51-
params=snuba_params,
56+
params=transaction_snuba_params,
5257
query_string="is_transaction:true",
5358
selected_columns=[
5459
"transaction",
@@ -74,10 +79,9 @@ def get_project_top_transaction_traces_for_llm_detection(
7479
if normalized_name in seen_names:
7580
continue
7681

77-
# Step 2: Get ONE trace for this transaction from THE SAME time window
7882
escaped_transaction_name = UNESCAPED_QUOTE_RE.sub('\\"', transaction_name)
7983
trace_result = Spans.run_table_query(
80-
params=snuba_params,
84+
params=trace_snuba_params,
8185
query_string=f'is_transaction:true transaction:"{escaped_transaction_name}"',
8286
selected_columns=["trace", "precise.start_ts"],
8387
orderby=["precise.start_ts"], # First trace in the window

tests/sentry/tasks/test_llm_issue_detection.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
detect_llm_issues_for_project,
1010
run_llm_issue_detection,
1111
)
12-
from sentry.tasks.llm_issue_detection.detection import START_TIME_DELTA_MINUTES
12+
from sentry.tasks.llm_issue_detection.detection import (
13+
START_TIME_DELTA_MINUTES,
14+
TRANSACTION_BATCH_SIZE,
15+
)
1316
from sentry.tasks.llm_issue_detection.trace_data import (
1417
get_project_top_transaction_traces_for_llm_detection,
1518
)
@@ -48,7 +51,9 @@ def test_detect_llm_issues_no_transactions(self, mock_get_transactions, mock_see
4851
detect_llm_issues_for_project(self.project.id)
4952

5053
mock_get_transactions.assert_called_once_with(
51-
self.project.id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES
54+
self.project.id,
55+
limit=TRANSACTION_BATCH_SIZE,
56+
start_time_delta_minutes=START_TIME_DELTA_MINUTES,
5257
)
5358
mock_seer_request.assert_not_called()
5459

@@ -308,7 +313,7 @@ def test_returns_deduped_transaction_traces(self) -> None:
308313
self.store_spans([span1, span2, span3], is_eap=True)
309314

310315
evidence_traces = get_project_top_transaction_traces_for_llm_detection(
311-
self.project.id, limit=50, start_time_delta_minutes=30
316+
self.project.id, limit=TRANSACTION_BATCH_SIZE, start_time_delta_minutes=30
312317
)
313318

314319
assert len(evidence_traces) == 2

0 commit comments

Comments
 (0)