|
17 | 17 | from django.db.models import Model, QuerySet |
18 | 18 | from django.utils import timezone |
19 | 19 |
|
| 20 | +from sentry import options |
20 | 21 | from sentry.runner.decorators import log_options |
21 | 22 | from sentry.silo.base import SiloLimit, SiloMode |
22 | 23 |
|
@@ -224,6 +225,15 @@ def is_filtered(model: type[Model]) -> bool: |
224 | 225 |
|
225 | 226 | deletes = models_which_use_deletions_code_path() |
226 | 227 |
|
| 228 | + if options.get("cleanup.delete-old-groups.enabled") and model == ("sentry.Group",): |
| 229 | + with metrics.timer("cleanup.stage", instance=router, tags={"stage": "old_groups"}): |
| 230 | + try: |
| 231 | + delete_groups_older_than_days( |
| 232 | + days=options.get("cleanup.delete-old-groups.days") |
| 233 | + ) |
| 234 | + except Exception: |
| 235 | + logger.exception("Error in delete_groups_older_than_days") |
| 236 | + |
227 | 237 | # Track timing for each deletion stage to monitor execution progress and identify bottlenecks |
228 | 238 | with metrics.timer( |
229 | 239 | "cleanup.stage", instance=router, tags={"stage": "specialized_cleanups"} |
@@ -298,6 +308,80 @@ def is_filtered(model: type[Model]) -> bool: |
298 | 308 | ) |
299 | 309 |
|
300 | 310 |
|
| 311 | +def delete_groups_older_than_days(days: int) -> None: |
| 312 | + """ |
| 313 | + Delete old groups based on last_seen timestamp. |
| 314 | +
|
| 315 | + Since there's no index with last_seen as the leading column, we get all project_ids |
| 316 | + (which is fast via the primary key index), then process each project using the |
| 317 | + (project_id, last_seen) index. |
| 318 | +
|
| 319 | + Uses batching to avoid long-running transactions and table locks. |
| 320 | + """ |
| 321 | + from sentry import options |
| 322 | + from sentry.models.group import Group |
| 323 | + from sentry.models.project import Project |
| 324 | + from sentry.utils import metrics |
| 325 | + |
| 326 | + metrics_key = "cleanup.delete-old-groups" |
| 327 | + |
| 328 | + cutoff_date = timezone.now() - timedelta(days=days) |
| 329 | + |
| 330 | + # Get all project_ids (uses Project primary key index, very fast) |
| 331 | + project_ids = list(Project.objects.values_list("id", flat=True)) |
| 332 | + |
| 333 | + # Track total iterations to respect the configured limit |
| 334 | + total_iterations = 0 |
| 335 | + max_iterations = options.get("cleanup.delete-old-groups.iterations") |
| 336 | + batch_size = options.get("cleanup.delete-old-groups.batch-size") |
| 337 | + |
| 338 | + # Process each project using the (project_id, last_seen) index |
| 339 | + for project_id in project_ids: |
| 340 | + # This query efficiently uses sentry_groupedmessage_project_id_last_seen index |
| 341 | + # by filtering on project_id first (leading column), then last_seen |
| 342 | + # ORDER BY last_seen DESC, id DESC matches the index for optimal performance |
| 343 | + # CREATE INDEX sentry_groupedmessage_project_id_last_seen ON public.sentry_groupedmessage USING btree (project_id, last_seen DESC) |
| 344 | + while True: |
| 345 | + # Check iteration limit |
| 346 | + if total_iterations >= max_iterations: |
| 347 | + logger.info( |
| 348 | + "Reached iteration limit, stopping deletion", |
| 349 | + extra={"max_iterations": max_iterations}, |
| 350 | + ) |
| 351 | + return |
| 352 | + |
| 353 | + with metrics.timer(f"{metrics_key}.delete_groups_for_project"): |
| 354 | + try: |
| 355 | + # Use order_by to ensure consistent ordering that matches index |
| 356 | + # This helps PostgreSQL use the index efficiently |
| 357 | + group_ids = list( |
| 358 | + Group.objects.filter(project_id=project_id, last_seen__lt=cutoff_date) |
| 359 | + .order_by("-last_seen", "-id") |
| 360 | + .values_list("id", flat=True)[:batch_size] |
| 361 | + ) |
| 362 | + |
| 363 | + if not group_ids: |
| 364 | + # No more groups to delete for this project |
| 365 | + break |
| 366 | + |
| 367 | + deleted_count, _ = Group.objects.filter(id__in=group_ids).delete() |
| 368 | + metrics.incr(f"{metrics_key}.deleted_count", deleted_count) |
| 369 | + total_iterations += 1 |
| 370 | + |
| 371 | + # If we deleted fewer groups than batch size, we're done with this project |
| 372 | + if len(group_ids) < batch_size: |
| 373 | + break |
| 374 | + |
| 375 | + except Exception: |
| 376 | + logger.exception("Error in delete_groups_batch") |
| 377 | + break |
| 378 | + |
| 379 | + # Check killswitch after each batch |
| 380 | + if not options.get("cleanup.delete-old-groups.enabled"): |
| 381 | + logger.info("Killswitch disabled, stopping deletion") |
| 382 | + return |
| 383 | + |
| 384 | + |
301 | 385 | def _validate_and_setup_environment(concurrency: int, silent: bool) -> None: |
302 | 386 | """Validate input parameters and set up environment variables.""" |
303 | 387 | if concurrency < 1: |
|
0 commit comments