Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 37 additions & 36 deletions src/sentry/tasks/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1620,17 +1620,10 @@ def check_if_flags_sent(job: PostProcessJob) -> None:
set_project_flag_and_signal(project, "has_flags", first_flag_received)


def kick_off_seer_automation(job: PostProcessJob) -> None:
from sentry.seer.autofix.issue_summary import get_issue_summary_lock_key
def seer_automation_permission_and_type_check(group: Group) -> bool:
from sentry import quotas
from sentry.constants import DataCategory
from sentry.seer.seer_setup import get_seer_org_acknowledgement
from sentry.tasks.autofix import start_seer_automation

event = job["event"]
group = event.group

# Only run on issues with no existing scan - TODO: Update condition for triage signals V0
if group.seer_fixability_score is not None:
return

# check currently supported issue categories for Seer
if group.issue_category not in [
Expand All @@ -1644,56 +1637,64 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
GroupCategory.REPLAY,
GroupCategory.FEEDBACK,
]:
return
return False

if not features.has("organizations:gen-ai-features", group.organization):
return
return False

gen_ai_allowed = not group.organization.get_option("sentry:hide_ai_features")
if not gen_ai_allowed:
return
return False

project = group.project
if (
not project.get_option("sentry:seer_scanner_automation")
and not group.issue_type.always_trigger_seer_automation
):
return

# Check if automation has already been queued or completed for this group
# seer_autofix_last_triggered is set when trigger_autofix is successfully started.
# Use cache with short TTL to hold lock for a short since it takes a few minutes to set seer_autofix_last_triggeredes
cache_key = f"seer_automation_queued:{group.id}"
if cache.get(cache_key) or group.seer_autofix_last_triggered is not None:
return

# Don't run if there's already a task in progress for this issue
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=1, name=lock_name)
if lock.locked():
return
return False

seer_enabled = get_seer_org_acknowledgement(group.organization)
if not seer_enabled:
return

from sentry import quotas
from sentry.constants import DataCategory
return False

has_budget: bool = quotas.backend.has_available_reserved_budget(
org_id=group.organization.id, data_category=DataCategory.SEER_SCANNER
)
if not has_budget:
return
return False

return True


def seer_automation_rate_limit_check(group: Group) -> bool:
from sentry.seer.autofix.utils import is_seer_scanner_rate_limited

if is_seer_scanner_rate_limited(project, group.organization):
if is_seer_scanner_rate_limited(group.project, group.organization):
return False
return True


def kick_off_seer_automation(job: PostProcessJob) -> None:
from sentry.seer.autofix.issue_summary import get_issue_summary_lock_key
from sentry.tasks.autofix import start_seer_automation

event = job["event"]
group = event.group

# Only run on issues with no existing scan - TODO: Update condition for triage signals V0
if group.seer_fixability_score is not None:
return

if seer_automation_permission_and_type_check(group) is False:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[BestPractice]

The comparison is False is used here and on line 1697. While technically correct, it's more Pythonic to use not seer_automation_permission_and_type_check(group) and not seer_automation_rate_limit_check(group) since these functions return boolean values, not None/False ambiguity.

Suggestion:

if not seer_automation_permission_and_type_check(group):
    return
Context for Agents
[**BestPractice**]

The comparison `is False` is used here and on line 1697. While technically correct, it's more Pythonic to use `not seer_automation_permission_and_type_check(group)` and `not seer_automation_rate_limit_check(group)` since these functions return boolean values, not None/False ambiguity.

Suggestion:
```python
if not seer_automation_permission_and_type_check(group):
    return
```

File: src/sentry/tasks/post_process.py
Line: 1688

return

# Don't run if there's already a task in progress for this issue
lock_key, lock_name = get_issue_summary_lock_key(group.id)
lock = locks.get(lock_key, duration=1, name=lock_name)
if lock.locked():
return

# cache.add uses Redis SETNX which atomically sets the key only if it doesn't exist
# Returns False if another process already set the key, ensuring only one process proceeds
if not cache.add(cache_key, True, timeout=600): # 10 minute
if seer_automation_rate_limit_check(group) is False:
return

start_seer_automation.delay(group.id)
Comment on lines +1677 to 1700

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CriticalError]

Critical: The removal of the cache-based deduplication mechanism (lines checking cache_key, cache.get(cache_key), group.seer_autofix_last_triggered, and cache.add(cache_key, True, timeout=600)) introduces a potential race condition.

The old code used cache.add() which is atomic (Redis SETNX) to ensure only one process proceeds. Without this, if multiple events for the same group are processed concurrently, start_seer_automation.delay(group.id) could be called multiple times for the same group.

The lock check on line 1693-1695 only prevents concurrent execution of this function, but doesn't prevent the same group from being queued multiple times if this function completes and is called again before the async task starts.

Consider: Was this deduplication moved elsewhere, or is the seer_fixability_score check on line 1685 intended to be sufficient? If so, document why multiple task queuing is now acceptable.

Context for Agents
[**CriticalError**]

Critical: The removal of the cache-based deduplication mechanism (lines checking `cache_key`, `cache.get(cache_key)`, `group.seer_autofix_last_triggered`, and `cache.add(cache_key, True, timeout=600)`) introduces a potential race condition.

The old code used `cache.add()` which is atomic (Redis SETNX) to ensure only one process proceeds. Without this, if multiple events for the same group are processed concurrently, `start_seer_automation.delay(group.id)` could be called multiple times for the same group.

The lock check on line 1693-1695 only prevents concurrent execution *of this function*, but doesn't prevent the same group from being queued multiple times if this function completes and is called again before the async task starts.

Consider: Was this deduplication moved elsewhere, or is the `seer_fixability_score` check on line 1685 intended to be sufficient? If so, document why multiple task queuing is now acceptable.

File: src/sentry/tasks/post_process.py
Line: 1700

Expand Down
177 changes: 64 additions & 113 deletions tests/sentry/tasks/test_post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from hashlib import md5
from typing import Any
from unittest import mock
from unittest.mock import MagicMock, Mock, patch
from unittest.mock import MagicMock, Mock, PropertyMock, patch

import pytest
from django.db import router
Expand Down Expand Up @@ -3053,136 +3053,86 @@ def test_kick_off_seer_automation_with_hide_ai_features_enabled(

mock_start_seer_automation.assert_not_called()

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
return_value=True,
)
@patch("sentry.tasks.autofix.start_seer_automation.delay")
@with_feature("organizations:gen-ai-features")
def test_kick_off_seer_automation_skips_when_seer_autofix_last_triggered_set(
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
):
"""Test that automation is skipped when group.seer_autofix_last_triggered is already set"""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)

# Set seer_autofix_last_triggered on the group to simulate autofix already triggered
group = event.group
group.seer_autofix_last_triggered = timezone.now()
group.save()

self.call_post_process_group(
is_new=True,
is_regression=False,
is_new_group_environment=True,
event=event,
)

mock_start_seer_automation.assert_not_called()
class SeerAutomationHelperFunctionsTestMixin(BasePostProgressGroupMixin):
"""Unit tests for seer_automation_permission_and_type_check and seer_automation_rate_limit_check."""

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
return_value=True,
)
@patch("sentry.tasks.autofix.start_seer_automation.delay")
@with_feature("organizations:gen-ai-features")
def test_kick_off_seer_automation_skips_when_cache_key_exists(
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
@patch("sentry.quotas.backend.has_available_reserved_budget", return_value=True)
@patch("sentry.seer.seer_setup.get_seer_org_acknowledgement", return_value=True)
@patch("sentry.features.has", return_value=True)
def test_seer_automation_permission_and_type_check(
self, mock_features_has, mock_get_seer_org_acknowledgement, mock_has_budget
):
"""Test that automation is skipped when cache key indicates it's already queued"""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)
"""Test permission check with various failure conditions."""
from sentry.constants import DataCategory
from sentry.issues.grouptype import GroupCategory
from sentry.tasks.post_process import seer_automation_permission_and_type_check

# Set cache key to simulate automation already queued
cache_key = f"seer_automation_queued:{event.group.id}"
cache.set(cache_key, True, timeout=600)

self.call_post_process_group(
is_new=True,
is_regression=False,
is_new_group_environment=True,
event=event,
)
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(data={"message": "testing"}, project_id=self.project.id)
group = event.group

mock_start_seer_automation.assert_not_called()
# All conditions pass
assert seer_automation_permission_and_type_check(group) is True

# Cleanup
cache.delete(cache_key)
# Unsupported categories (using PropertyMock to mock the property)
with patch(
"sentry.models.group.Group.issue_category", new_callable=PropertyMock
) as mock_category:
mock_category.return_value = GroupCategory.REPLAY
assert seer_automation_permission_and_type_check(group) is False

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
return_value=True,
)
@patch("sentry.tasks.autofix.start_seer_automation.delay")
@with_feature("organizations:gen-ai-features")
def test_kick_off_seer_automation_uses_atomic_cache_add(
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
):
"""Test that cache.add atomic operation prevents race conditions"""
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
)
mock_category.return_value = GroupCategory.FEEDBACK
assert seer_automation_permission_and_type_check(group) is False

cache_key = f"seer_automation_queued:{event.group.id}"
# Missing feature flag
mock_features_has.return_value = False
assert seer_automation_permission_and_type_check(group) is False

with patch("sentry.tasks.post_process.cache") as mock_cache:
# Simulate cache.get returning None (not in cache)
# but cache.add returning False (another process set it)
mock_cache.get.return_value = None
mock_cache.add.return_value = False
# Hide AI features enabled
mock_features_has.return_value = True
self.organization.update_option("sentry:hide_ai_features", True)
assert seer_automation_permission_and_type_check(group) is False
self.organization.update_option("sentry:hide_ai_features", False)

self.call_post_process_group(
is_new=True,
is_regression=False,
is_new_group_environment=True,
event=event,
)
# Scanner disabled without always_trigger
self.project.update_option("sentry:seer_scanner_automation", False)
with patch.object(group.issue_type, "always_trigger_seer_automation", False):
assert seer_automation_permission_and_type_check(group) is False

# Should check cache but not call automation due to cache.add returning False
mock_cache.get.assert_called()
mock_cache.add.assert_called_once_with(cache_key, True, timeout=600)
mock_start_seer_automation.assert_not_called()
# Scanner disabled but always_trigger enabled
with patch.object(group.issue_type, "always_trigger_seer_automation", True):
assert seer_automation_permission_and_type_check(group) is True

@patch(
"sentry.seer.seer_setup.get_seer_org_acknowledgement",
return_value=True,
)
@patch("sentry.tasks.autofix.start_seer_automation.delay")
@with_feature("organizations:gen-ai-features")
def test_kick_off_seer_automation_proceeds_when_cache_add_succeeds(
self, mock_start_seer_automation, mock_get_seer_org_acknowledgement
):
"""Test that automation proceeds when cache.add succeeds (no race condition)"""
# Seer not acknowledged
self.project.update_option("sentry:seer_scanner_automation", True)
event = self.create_event(
data={"message": "testing"},
project_id=self.project.id,
mock_get_seer_org_acknowledgement.return_value = False
assert seer_automation_permission_and_type_check(group) is False

# No budget
mock_get_seer_org_acknowledgement.return_value = True
mock_has_budget.return_value = False
assert seer_automation_permission_and_type_check(group) is False
mock_has_budget.assert_called_with(
org_id=group.organization.id, data_category=DataCategory.SEER_SCANNER
)

# Ensure seer_autofix_last_triggered is not set
assert event.group.seer_autofix_last_triggered is None
@patch("sentry.seer.autofix.utils.is_seer_scanner_rate_limited")
def test_seer_automation_rate_limit_check(self, mock_is_rate_limited):
"""Test rate limit check returns correct value based on rate limiting status."""
from sentry.tasks.post_process import seer_automation_rate_limit_check

self.call_post_process_group(
is_new=True,
is_regression=False,
is_new_group_environment=True,
event=event,
)
event = self.create_event(data={"message": "testing"}, project_id=self.project.id)
group = event.group

# Should successfully queue automation
mock_start_seer_automation.assert_called_once_with(event.group.id)
mock_is_rate_limited.return_value = False
assert seer_automation_rate_limit_check(group) is True

mock_is_rate_limited.return_value = True
assert seer_automation_rate_limit_check(group) is False

# Cleanup
cache_key = f"seer_automation_queued:{event.group.id}"
cache.delete(cache_key)
assert mock_is_rate_limited.call_count == 2
mock_is_rate_limited.assert_called_with(group.project, group.organization)


class PostProcessGroupErrorTest(
Expand All @@ -3194,6 +3144,7 @@ class PostProcessGroupErrorTest(
InboxTestMixin,
ResourceChangeBoundsTestMixin,
KickOffSeerAutomationTestMixin,
SeerAutomationHelperFunctionsTestMixin,
RuleProcessorTestMixin,
ServiceHooksTestMixin,
SnoozeTestMixin,
Expand Down