Skip to content

Commit 823d8b9

Browse files
committed
WIP
1 parent 7b4f672 commit 823d8b9

File tree

3 files changed

+373
-0
lines changed

3 files changed

+373
-0
lines changed

src/sentry/options/defaults.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,32 @@
342342
flags=FLAG_AUTOMATOR_MODIFIABLE,
343343
)
344344

345+
# Cleanup
346+
register(
347+
"cleanup.delete-old-groups.enabled",
348+
default=False,
349+
type=Bool,
350+
flags=FLAG_AUTOMATOR_MODIFIABLE,
351+
)
352+
register(
353+
"cleanup.delete-old-groups.days",
354+
default=300,
355+
type=Int,
356+
flags=FLAG_AUTOMATOR_MODIFIABLE,
357+
)
358+
register(
359+
"cleanup.delete-old-groups.batch-size",
360+
default=1000,
361+
type=Int,
362+
flags=FLAG_AUTOMATOR_MODIFIABLE,
363+
)
364+
register(
365+
"cleanup.delete-old-groups.iterations",
366+
default=100,
367+
type=Int,
368+
flags=FLAG_AUTOMATOR_MODIFIABLE,
369+
)
370+
345371
# Filestore (default)
346372
register("filestore.backend", default="filesystem", flags=FLAG_NOSTORE)
347373
register("filestore.options", default={"location": "/tmp/sentry-files"}, flags=FLAG_NOSTORE)

src/sentry/runner/commands/cleanup.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from django.db.models import Model, QuerySet
1818
from django.utils import timezone
1919

20+
from sentry import options
2021
from sentry.runner.decorators import log_options
2122
from sentry.silo.base import SiloLimit, SiloMode
2223

@@ -224,6 +225,15 @@ def is_filtered(model: type[Model]) -> bool:
224225

225226
deletes = models_which_use_deletions_code_path()
226227

228+
if options.get("cleanup.delete-old-groups.enabled") and model == ("sentry.Group",):
229+
with metrics.timer("cleanup.stage", instance=router, tags={"stage": "old_groups"}):
230+
try:
231+
delete_groups_older_than_days(
232+
days=options.get("cleanup.delete-old-groups.days")
233+
)
234+
except Exception:
235+
logger.exception("Error in delete_groups_older_than_days")
236+
227237
# Track timing for each deletion stage to monitor execution progress and identify bottlenecks
228238
with metrics.timer(
229239
"cleanup.stage", instance=router, tags={"stage": "specialized_cleanups"}
@@ -298,6 +308,80 @@ def is_filtered(model: type[Model]) -> bool:
298308
)
299309

300310

311+
def delete_groups_older_than_days(days: int) -> None:
312+
"""
313+
Delete old groups based on last_seen timestamp.
314+
315+
Since there's no index with last_seen as the leading column, we get all project_ids
316+
(which is fast via the primary key index), then process each project using the
317+
(project_id, last_seen) index.
318+
319+
Uses batching to avoid long-running transactions and table locks.
320+
"""
321+
from sentry import options
322+
from sentry.models.group import Group
323+
from sentry.models.project import Project
324+
from sentry.utils import metrics
325+
326+
metrics_key = "cleanup.delete-old-groups"
327+
328+
cutoff_date = timezone.now() - timedelta(days=days)
329+
330+
# Get all project_ids (uses Project primary key index, very fast)
331+
project_ids = list(Project.objects.values_list("id", flat=True))
332+
333+
# Track total iterations to respect the configured limit
334+
total_iterations = 0
335+
max_iterations = options.get("cleanup.delete-old-groups.iterations")
336+
batch_size = options.get("cleanup.delete-old-groups.batch-size")
337+
338+
# Process each project using the (project_id, last_seen) index
339+
for project_id in project_ids:
340+
# This query efficiently uses sentry_groupedmessage_project_id_last_seen index
341+
# by filtering on project_id first (leading column), then last_seen
342+
# ORDER BY last_seen DESC, id DESC matches the index for optimal performance
343+
# CREATE INDEX sentry_groupedmessage_project_id_last_seen ON public.sentry_groupedmessage USING btree (project_id, last_seen DESC)
344+
while True:
345+
# Check iteration limit
346+
if total_iterations >= max_iterations:
347+
logger.info(
348+
"Reached iteration limit, stopping deletion",
349+
extra={"max_iterations": max_iterations},
350+
)
351+
return
352+
353+
with metrics.timer(f"{metrics_key}.delete_groups_for_project"):
354+
try:
355+
# Use order_by to ensure consistent ordering that matches index
356+
# This helps PostgreSQL use the index efficiently
357+
group_ids = list(
358+
Group.objects.filter(project_id=project_id, last_seen__lt=cutoff_date)
359+
.order_by("-last_seen", "-id")
360+
.values_list("id", flat=True)[:batch_size]
361+
)
362+
363+
if not group_ids:
364+
# No more groups to delete for this project
365+
break
366+
367+
deleted_count, _ = Group.objects.filter(id__in=group_ids).delete()
368+
metrics.incr(f"{metrics_key}.deleted_count", deleted_count)
369+
total_iterations += 1
370+
371+
# If we deleted fewer groups than batch size, we're done with this project
372+
if len(group_ids) < batch_size:
373+
break
374+
375+
except Exception:
376+
logger.exception("Error in delete_groups_batch")
377+
break
378+
379+
# Check killswitch after each batch
380+
if not options.get("cleanup.delete-old-groups.enabled"):
381+
logger.info("Killswitch disabled, stopping deletion")
382+
return
383+
384+
301385
def _validate_and_setup_environment(concurrency: int, silent: bool) -> None:
302386
"""Validate input parameters and set up environment variables."""
303387
if concurrency < 1:

0 commit comments

Comments
 (0)