Skip to content
Closed

WIP #101561

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,32 @@
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# Cleanup
register(
"cleanup.delete-old-groups.enabled",
default=False,
type=Bool,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"cleanup.delete-old-groups.days",
default=300,
type=Int,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"cleanup.delete-old-groups.batch-size",
default=1000,
type=Int,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"cleanup.delete-old-groups.iterations",
default=100,
type=Int,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# Filestore (default)
register("filestore.backend", default="filesystem", flags=FLAG_NOSTORE)
register("filestore.options", default={"location": "/tmp/sentry-files"}, flags=FLAG_NOSTORE)
Expand Down
84 changes: 84 additions & 0 deletions src/sentry/runner/commands/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from django.db.models import Model, QuerySet
from django.utils import timezone

from sentry import options
from sentry.runner.decorators import log_options
from sentry.silo.base import SiloLimit, SiloMode

Expand Down Expand Up @@ -224,6 +225,15 @@ def is_filtered(model: type[Model]) -> bool:

deletes = models_which_use_deletions_code_path()

if options.get("cleanup.delete-old-groups.enabled") and model == ("sentry.Group",):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Group Cleanup Blocked by Incorrect Model Handling

The condition model == ("sentry.Group",) prevents the new group cleanup from running. The model parameter from CLI arguments typically contains simple names like "Group" or full paths, not ("sentry.Group",). This is inconsistent with how models are usually handled (e.g., lowercase names in a list, or all models if none specified), making the new cleanup unreachable.

Fix in Cursor Fix in Web

with metrics.timer("cleanup.stage", instance=router, tags={"stage": "old_groups"}):
try:
delete_groups_older_than_days(
days=options.get("cleanup.delete-old-groups.days")
)
except Exception:
logger.exception("Error in delete_groups_older_than_days")

# Track timing for each deletion stage to monitor execution progress and identify bottlenecks
with metrics.timer(
"cleanup.stage", instance=router, tags={"stage": "specialized_cleanups"}
Expand Down Expand Up @@ -298,6 +308,80 @@ def is_filtered(model: type[Model]) -> bool:
)


def delete_groups_older_than_days(days: int) -> None:
"""
Delete old groups based on last_seen timestamp.

Since there's no index with last_seen as the leading column, we get all project_ids
(which is fast via the primary key index), then process each project using the
(project_id, last_seen) index.

Uses batching to avoid long-running transactions and table locks.
"""
from sentry import options
from sentry.models.group import Group
from sentry.models.project import Project
from sentry.utils import metrics

metrics_key = "cleanup.delete-old-groups"

cutoff_date = timezone.now() - timedelta(days=days)

# Get all project_ids (uses Project primary key index, very fast)
project_ids = list(Project.objects.values_list("id", flat=True))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High severity vulnerability may affect your project—review required:
Line 331 lists a dependency (django) with a known High severity vulnerability.

ℹ️ Why this matters

Affected versions of Django are vulnerable to Improper Neutralization of Special Elements used in an SQL Command ('SQL Injection'). SQL injection in Django's ORM column aliases: when using QuerySet.annotate(), QuerySet.alias(), QuerySet.aggregate(), or QuerySet.extra() with dictionary expansion (**kwargs), the dictionary keys are used unescaped as SQL column aliases. On MySQL and MariaDB backends, an attacker who can influence those keys (for example, by passing a crafted dict of annotations) can inject arbitrary SQL into the generated query.

References: GHSA, CVE

To resolve this comment:
Check if you are using Django with MySQL or MariaDB.

  • If you're affected, upgrade this dependency to at least version 5.2.7 at uv.lock.
  • If you're not affected, comment /fp we don't use this [condition]
💬 Ignore this finding

To ignore this, reply with:

  • /fp <comment> for false positive
  • /ar <comment> for acceptable risk
  • /other <comment> for all other reasons

You can view more details on this finding in the Semgrep AppSec Platform here.


# Track total iterations to respect the configured limit
total_iterations = 0
max_iterations = options.get("cleanup.delete-old-groups.iterations")
batch_size = options.get("cleanup.delete-old-groups.batch-size")

# Process each project using the (project_id, last_seen) index
for project_id in project_ids:
# This query efficiently uses sentry_groupedmessage_project_id_last_seen index
# by filtering on project_id first (leading column), then last_seen
# ORDER BY last_seen DESC, id DESC matches the index for optimal performance
# CREATE INDEX sentry_groupedmessage_project_id_last_seen ON public.sentry_groupedmessage USING btree (project_id, last_seen DESC)
while True:
# Check iteration limit
if total_iterations >= max_iterations:
logger.info(
"Reached iteration limit, stopping deletion",
extra={"max_iterations": max_iterations},
)
return

with metrics.timer(f"{metrics_key}.delete_groups_for_project"):
try:
# Use order_by to ensure consistent ordering that matches index
# This helps PostgreSQL use the index efficiently
group_ids = list(
Group.objects.filter(project_id=project_id, last_seen__lt=cutoff_date)
.order_by("-last_seen", "-id")
.values_list("id", flat=True)[:batch_size]
Comment on lines +358 to +360

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High severity vulnerability may affect your project—review required:
Line 358 lists a dependency (django) with a known High severity vulnerability.

ℹ️ Why this matters

Affected versions of Django are vulnerable to Improper Neutralization of Special Elements used in an SQL Command ('SQL Injection'). SQL injection in Django's ORM column aliases: when using QuerySet.annotate(), QuerySet.alias(), QuerySet.aggregate(), or QuerySet.extra() with dictionary expansion (**kwargs), the dictionary keys are used unescaped as SQL column aliases. On MySQL and MariaDB backends, an attacker who can influence those keys (for example, by passing a crafted dict of annotations) can inject arbitrary SQL into the generated query.

References: GHSA, CVE

To resolve this comment:
Check if you are using Django with MySQL or MariaDB.

  • If you're affected, upgrade this dependency to at least version 5.2.7 at uv.lock.
  • If you're not affected, comment /fp we don't use this [condition]
💬 Ignore this finding

To ignore this, reply with:

  • /fp <comment> for false positive
  • /ar <comment> for acceptable risk
  • /other <comment> for all other reasons

You can view more details on this finding in the Semgrep AppSec Platform here.

)

if not group_ids:
# No more groups to delete for this project
break

deleted_count, _ = Group.objects.filter(id__in=group_ids).delete()
metrics.incr(f"{metrics_key}.deleted_count", deleted_count)
total_iterations += 1

# If we deleted fewer groups than batch size, we're done with this project
if len(group_ids) < batch_size:
break

except Exception:
logger.exception("Error in delete_groups_batch")
break

# Check killswitch after each batch
if not options.get("cleanup.delete-old-groups.enabled"):
logger.info("Killswitch disabled, stopping deletion")
return


def _validate_and_setup_environment(concurrency: int, silent: bool) -> None:
"""Validate input parameters and set up environment variables."""
if concurrency < 1:
Expand Down
Loading
Loading