|
3 | 3 | import os |
4 | 4 |
|
5 | 5 | from celery import Celery |
| 6 | +from celery.schedules import crontab |
6 | 7 | from dotenv import load_dotenv |
7 | 8 |
|
8 | 9 | # Load environment variables |
|
12 | 13 | CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0") |
13 | 14 | CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0") |
14 | 15 |
|
| 16 | +# Get schedule checker interval from environment |
| 17 | +# Format: "<number><unit>" where unit is 'm' (minutes) or 'h' (hours) |
| 18 | +# Examples: "1m" (every minute), "5m" (every 5 minutes), "1h" (every hour) |
| 19 | +SCHEDULE_CHECKER_INTERVAL = os.getenv("SCHEDULE_CHECKER_INTERVAL", "2m") |
| 20 | + |
| 21 | + |
| 22 | +def parse_schedule_interval(interval: str) -> dict: |
| 23 | + """Parse interval string into crontab parameters. |
| 24 | +
|
| 25 | + Args: |
| 26 | + interval: String like "1m", "5m", "1h", etc. |
| 27 | +
|
| 28 | + Returns: |
| 29 | + Dict with crontab parameters (minute, hour) |
| 30 | + """ |
| 31 | + interval = interval.strip().lower() |
| 32 | + |
| 33 | + # Extract number and unit |
| 34 | + if interval.endswith("m") or interval.endswith("min"): |
| 35 | + # Minutes |
| 36 | + num = int(interval.rstrip("min")) |
| 37 | + if num == 1: |
| 38 | + return {"minute": "*", "hour": "*"} |
| 39 | + else: |
| 40 | + return {"minute": f"*/{num}", "hour": "*"} |
| 41 | + elif interval.endswith("h") or interval.endswith("hour"): |
| 42 | + # Hours |
| 43 | + num = int(interval.rstrip("hour")) |
| 44 | + if num == 1: |
| 45 | + return {"minute": "0", "hour": "*"} |
| 46 | + else: |
| 47 | + return {"minute": "0", "hour": f"*/{num}"} |
| 48 | + else: |
| 49 | + # Default to every minute if parsing fails |
| 50 | + return {"minute": "*", "hour": "*"} |
| 51 | + |
| 52 | + |
| 53 | +# Parse the schedule interval |
| 54 | +schedule_params = parse_schedule_interval(SCHEDULE_CHECKER_INTERVAL) |
| 55 | + |
15 | 56 | # Create Celery app |
16 | 57 | celery_app = Celery( |
17 | 58 | "surfsense", |
|
21 | 62 | "app.tasks.celery_tasks.document_tasks", |
22 | 63 | "app.tasks.celery_tasks.podcast_tasks", |
23 | 64 | "app.tasks.celery_tasks.connector_tasks", |
| 65 | + "app.tasks.celery_tasks.schedule_checker_task", |
24 | 66 | ], |
25 | 67 | ) |
26 | 68 |
|
|
47 | 89 | task_reject_on_worker_lost=True, |
48 | 90 | # Broker settings |
49 | 91 | broker_connection_retry_on_startup=True, |
| 92 | + # Beat scheduler settings |
| 93 | + beat_max_loop_interval=60, # Check every minute |
50 | 94 | ) |
51 | 95 |
|
52 | | -# Optional: Configure Celery Beat for periodic tasks |
| 96 | +# Configure Celery Beat schedule |
| 97 | +# This uses a meta-scheduler pattern: instead of creating individual Beat schedules |
| 98 | +# for each connector, we have ONE schedule that checks the database at the configured interval |
| 99 | +# for connectors that need indexing. This provides dynamic scheduling without restarts. |
53 | 100 | celery_app.conf.beat_schedule = { |
54 | | - # Example: Add periodic tasks here if needed |
55 | | - # "periodic-task-name": { |
56 | | - # "task": "app.tasks.celery_tasks.some_task", |
57 | | - # "schedule": crontab(minute=0, hour=0), # Run daily at midnight |
58 | | - # }, |
| 101 | + "check-periodic-connector-schedules": { |
| 102 | + "task": "check_periodic_schedules", |
| 103 | + "schedule": crontab(**schedule_params), |
| 104 | + "options": { |
| 105 | + "expires": 30, # Task expires after 30 seconds if not picked up |
| 106 | + }, |
| 107 | + }, |
59 | 108 | } |
0 commit comments