@@ -169,6 +169,24 @@ def format_local_time_filter(utc_dt, format_str='%Y-%m-%d %H:%M'):
169169def schedule_all_repositories ():
170170 """Schedule all active repositories on startup"""
171171 try :
172+ # Clean up any stuck 'running' jobs from previous sessions
173+ stuck_jobs = BackupJob .query .filter_by (status = 'running' ).all ()
174+ if stuck_jobs :
175+ logger .warning (f"Found { len (stuck_jobs )} stuck 'running' jobs from previous session" )
176+ for stuck_job in stuck_jobs :
177+ stuck_job .status = 'failed'
178+ stuck_job .error_message = 'Job was running when application restarted'
179+ stuck_job .completed_at = datetime .utcnow ()
180+ logger .info (f"Marked stuck job as failed: { stuck_job .id } for repository { stuck_job .repository_id } " )
181+ db .session .commit ()
182+
183+ # First, clear any existing jobs to prevent duplicates
184+ existing_jobs = scheduler .get_jobs ()
185+ for job in existing_jobs :
186+ if job .id .startswith ('backup_' ):
187+ scheduler .remove_job (job .id )
188+ logger .info (f"Removed existing job on startup: { job .id } " )
189+
172190 repositories = Repository .query .filter_by (is_active = True ).all ()
173191 scheduled_count = 0
174192 for repository in repositories :
@@ -180,15 +198,24 @@ def schedule_all_repositories():
180198 except Exception as e :
181199 logger .error (f"Error scheduling repositories on startup: { e } " )
182200
183- # Flag to ensure we only initialize once
201+ # Thread-safe flag to ensure we only initialize once
202+ import threading
203+ _scheduler_lock = threading .Lock ()
184204_scheduler_initialized = False
185205
186206def ensure_scheduler_initialized ():
187- """Ensure scheduler is initialized with existing repositories"""
207+ """Ensure scheduler is initialized with existing repositories (thread-safe) """
188208 global _scheduler_initialized
189- if not _scheduler_initialized :
190- schedule_all_repositories ()
191- _scheduler_initialized = True
209+ if _scheduler_initialized :
210+ return
211+
212+ with _scheduler_lock :
213+ # Double-check pattern to avoid race conditions
214+ if not _scheduler_initialized :
215+ logger .info ("Initializing scheduler with existing repositories..." )
216+ schedule_all_repositories ()
217+ _scheduler_initialized = True
218+ logger .info ("Scheduler initialization completed" )
192219
193220@login_manager .user_loader
194221def load_user (user_id ):
@@ -197,7 +224,6 @@ def load_user(user_id):
197224@app .route ('/' )
198225@login_required
199226def dashboard ():
200- ensure_scheduler_initialized ()
201227 repositories = Repository .query .filter_by (user_id = current_user .id ).all ()
202228 recent_jobs = BackupJob .query .filter_by (user_id = current_user .id ).order_by (BackupJob .created_at .desc ()).limit (10 ).all ()
203229 return render_template ('dashboard.html' , repositories = repositories , recent_jobs = recent_jobs )
@@ -456,14 +482,23 @@ def edit_repository(repo_id):
456482
457483 db .session .commit ()
458484
459- # Reschedule the backup job
485+ # Reschedule the backup job - more robust approach
486+ job_id = f'backup_{ repo_id } '
460487 try :
461- scheduler .remove_job (f'backup_{ repo_id } ' , jobstore = None )
462- except :
463- pass
488+ # Remove job if it exists
489+ if scheduler .get_job (job_id ):
490+ scheduler .remove_job (job_id )
491+ logger .info (f"Removed existing job during edit: { job_id } " )
492+ except Exception as e :
493+ logger .warning (f"Could not remove job during edit { job_id } : { e } " )
464494
465- if repository .is_active :
495+ # Wait a moment to ensure job removal is complete
496+ import time
497+ time .sleep (0.1 )
498+
499+ if repository .is_active and repository .schedule_type != 'manual' :
466500 schedule_backup_job (repository )
501+ logger .info (f"Rescheduled job for repository: { repository .name } " )
467502
468503 flash ('Repository updated successfully' , 'success' )
469504 return redirect (url_for ('repositories' ))
@@ -590,11 +625,18 @@ def schedule_backup_job(repository):
590625
591626 job_id = f'backup_{ repository .id } '
592627
593- # Remove existing job if it exists
628+ # Remove existing job if it exists - try multiple ways to ensure it's gone
594629 try :
595- scheduler .remove_job (job_id )
596- except :
597- pass
630+ if scheduler .get_job (job_id ):
631+ scheduler .remove_job (job_id )
632+ logger .info (f"Removed existing scheduled job: { job_id } " )
633+ except Exception as e :
634+ logger .warning (f"Could not remove existing job { job_id } : { e } " )
635+
636+ # Double-check that job is really gone
637+ if scheduler .get_job (job_id ):
638+ logger .error (f"Job { job_id } still exists after removal attempt" )
639+ return
598640
599641 # Create a wrapper function that includes Flask app context
600642 def backup_with_context ():
@@ -613,12 +655,26 @@ def backup_with_context():
613655 ).first ()
614656
615657 if running_job :
616- logger .warning (f"Backup already running for repository { repo .name } , skipping" )
658+ logger .warning (f"Backup already running for repository { repo .name } (job { running_job . id } ) , skipping" )
617659 return
618660
661+ # Additional check: ensure no backup started in the last 30 seconds to prevent rapid duplicates
662+ recent_cutoff = datetime .utcnow () - timedelta (seconds = 30 )
663+ recent_backup = BackupJob .query .filter_by (
664+ repository_id = repository .id
665+ ).filter (
666+ BackupJob .started_at > recent_cutoff
667+ ).first ()
668+
669+ if recent_backup :
670+ logger .warning (f"Recent backup found for repository { repo .name } (started at { recent_backup .started_at } ), skipping" )
671+ return
672+
673+ logger .info (f"Starting scheduled backup for repository: { repo .name } " )
619674 backup_service .backup_repository (repo )
675+
620676 except Exception as e :
621- logger .error (f"Error in scheduled backup for repository { repository .id } : { e } " )
677+ logger .error (f"Error in scheduled backup for repository { repository .id } : { e } " , exc_info = True )
622678
623679 # Create new schedule based on schedule_type
624680 if repository .schedule_type == 'hourly' :
@@ -693,11 +749,29 @@ def backup_with_context():
693749 id = job_id ,
694750 name = f'Backup { repository .name } ' ,
695751 replace_existing = True ,
696- misfire_grace_time = 300 , # 5 minutes grace time
697- coalesce = True # Combine multiple pending executions
752+ misfire_grace_time = 60 , # Reduced from 5 minutes to 1 minute
753+ coalesce = True , # Combine multiple pending executions
754+ max_instances = 1 # Only one instance of this specific job can run
698755 )
699756
700757 logger .info (f"Scheduled backup job for { repository .name } with trigger: { trigger } " )
758+
759+ # Verify the job was actually added
760+ added_job = scheduler .get_job (job_id )
761+ if added_job :
762+ logger .info (f"Job { job_id } successfully scheduled, next run: { added_job .next_run_time } " )
763+ else :
764+ logger .error (f"Failed to schedule job { job_id } " )
765+
766+ # Initialize scheduler with existing repositories at startup
767+ # This runs after all functions are defined
768+ try :
769+ with app .app_context ():
770+ logger .info ("Starting scheduler initialization at app startup..." )
771+ ensure_scheduler_initialized ()
772+ logger .info ("Scheduler initialization at startup completed" )
773+ except Exception as e :
774+ logger .error (f"Failed to initialize scheduler at startup: { e } " )
701775
702776if __name__ == '__main__' :
703777 app .run (host = '0.0.0.0' , port = 8080 , debug = False )
0 commit comments