Skip to content

Commit 131210a

Browse files
Backup fixes
1 parent 06bc313 commit 131210a

File tree

2 files changed

+113
-41
lines changed

2 files changed

+113
-41
lines changed

app.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@
2424
)
2525
logger = logging.getLogger(__name__)
2626

27-
# Set APScheduler logging level to DEBUG for better debugging
28-
logging.getLogger('apscheduler').setLevel(logging.DEBUG)
27+
# Set APScheduler logging level to WARNING to reduce noise
28+
logging.getLogger('apscheduler').setLevel(logging.WARNING)
29+
30+
# Set timezone
31+
LOCAL_TZ = pytz.timezone(os.environ.get('TZ', 'UTC'))
2932

3033
app = Flask(__name__)
3134
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'dev-secret-key-change-in-production')
@@ -138,8 +141,28 @@ def format_local_time_filter(utc_dt, format_str='%Y-%m-%d %H:%M'):
138141
# Initialize backup service
139142
backup_service = BackupService()
140143

141-
# Initialize scheduler
142-
scheduler = BackgroundScheduler()
144+
# Initialize scheduler with job store to prevent duplicates
145+
from apscheduler.jobstores.memory import MemoryJobStore
146+
from apscheduler.executors.pool import ThreadPoolExecutor
147+
148+
jobstores = {
149+
'default': MemoryJobStore()
150+
}
151+
executors = {
152+
'default': ThreadPoolExecutor(max_workers=2) # Limit concurrent backups
153+
}
154+
job_defaults = {
155+
'coalesce': True, # Combine multiple pending executions of the same job
156+
'max_instances': 1, # Only one instance of a job can run at a time
157+
'misfire_grace_time': 60 # 1 minute grace time for missed jobs
158+
}
159+
160+
scheduler = BackgroundScheduler(
161+
jobstores=jobstores,
162+
executors=executors,
163+
job_defaults=job_defaults,
164+
timezone=LOCAL_TZ
165+
)
143166
scheduler.start()
144167
atexit.register(lambda: scheduler.shutdown())
145168

@@ -579,10 +602,21 @@ def backup_with_context():
579602
try:
580603
# Refresh the repository object to ensure it's bound to the current session
581604
repo = Repository.query.get(repository.id)
582-
if repo and repo.is_active:
583-
backup_service.backup_repository(repo)
584-
else:
605+
if not repo or not repo.is_active:
585606
logger.warning(f"Repository {repository.id} not found or inactive, skipping backup")
607+
return
608+
609+
# Check if there's already a running backup for this repository
610+
running_job = BackupJob.query.filter_by(
611+
repository_id=repository.id,
612+
status='running'
613+
).first()
614+
615+
if running_job:
616+
logger.warning(f"Backup already running for repository {repo.name}, skipping")
617+
return
618+
619+
backup_service.backup_repository(repo)
586620
except Exception as e:
587621
logger.error(f"Error in scheduled backup for repository {repository.id}: {e}")
588622

@@ -654,12 +688,17 @@ def backup_with_context():
654688
return # Manual only
655689

656690
scheduler.add_job(
657-
func=backup_with_context, # Use the wrapper function instead
691+
func=backup_with_context,
658692
trigger=trigger,
659693
id=job_id,
660694
name=f'Backup {repository.name}',
695+
replace_existing=True,
696+
misfire_grace_time=300, # 5 minutes grace time
697+
coalesce=True, # Combine multiple pending executions
661698
replace_existing=True
662699
)
700+
701+
logger.info(f"Scheduled backup job for {repository.name} with trigger: {trigger}")
663702

664703
if __name__ == '__main__':
665704
app.run(host='0.0.0.0', port=8080, debug=False)

backup_service.py

Lines changed: 66 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def backup_repository(self, repository):
3131
db.session.add(backup_job)
3232
db.session.commit()
3333

34+
temp_clone_dir = None
3435
try:
3536
# Create user-specific backup directory
3637
user_backup_dir = self.backup_base_dir / f"user_{repository.user_id}"
@@ -44,39 +45,39 @@ def backup_repository(self, repository):
4445
timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
4546
backup_name = f"{repository.name}_{timestamp}"
4647

47-
# Clone repository to temporary location
48+
# Create unique temporary directory and ensure it's clean
4849
temp_clone_dir = repo_backup_dir / f"temp_{timestamp}"
50+
51+
# Remove temp directory if it already exists
52+
if temp_clone_dir.exists():
53+
logger.warning(f"Temp directory already exists, removing: {temp_clone_dir}")
54+
shutil.rmtree(temp_clone_dir)
55+
4956
temp_clone_dir.mkdir(exist_ok=True)
5057

51-
try:
52-
self._clone_repository(repository, temp_clone_dir)
53-
54-
# Create backup in specified format
55-
backup_path = self._create_backup(
56-
temp_clone_dir,
57-
repo_backup_dir,
58-
backup_name,
59-
repository.backup_format
60-
)
61-
62-
# Clean up old backups based on retention policy
63-
self._cleanup_old_backups(repo_backup_dir, repository.retention_count, repository.backup_format)
64-
65-
# Update backup job record
66-
backup_job.status = 'completed'
67-
backup_job.backup_path = str(backup_path)
68-
backup_job.file_size = self._get_file_size(backup_path)
69-
backup_job.completed_at = datetime.utcnow()
70-
71-
# Update repository last backup time
72-
repository.last_backup = datetime.utcnow()
73-
74-
logger.info(f"Backup completed successfully: {backup_path}")
75-
76-
finally:
77-
# Always clean up temporary directory
78-
if temp_clone_dir.exists():
79-
shutil.rmtree(temp_clone_dir)
58+
self._clone_repository(repository, temp_clone_dir)
59+
60+
# Create backup in specified format
61+
backup_path = self._create_backup(
62+
temp_clone_dir,
63+
repo_backup_dir,
64+
backup_name,
65+
repository.backup_format
66+
)
67+
68+
# Clean up old backups based on retention policy
69+
self._cleanup_old_backups(repo_backup_dir, repository.retention_count, repository.backup_format)
70+
71+
# Update backup job record
72+
backup_job.status = 'completed'
73+
backup_job.backup_path = str(backup_path)
74+
backup_job.file_size = self._get_file_size(backup_path)
75+
backup_job.completed_at = datetime.utcnow()
76+
77+
# Update repository last backup time
78+
repository.last_backup = datetime.utcnow()
79+
80+
logger.info(f"Backup completed successfully: {backup_path}")
8081

8182
except Exception as e:
8283
logger.error(f"Backup failed for repository {repository.name}: {str(e)}")
@@ -85,6 +86,14 @@ def backup_repository(self, repository):
8586
backup_job.completed_at = datetime.utcnow()
8687

8788
finally:
89+
# Always clean up temporary directory
90+
if temp_clone_dir and temp_clone_dir.exists():
91+
try:
92+
logger.info(f"Cleaning up temporary directory: {temp_clone_dir}")
93+
shutil.rmtree(temp_clone_dir)
94+
except Exception as cleanup_error:
95+
logger.error(f"Failed to cleanup temp directory {temp_clone_dir}: {cleanup_error}")
96+
8897
db.session.commit()
8998

9099
def _clone_repository(self, repository, clone_dir):
@@ -97,9 +106,33 @@ def _clone_repository(self, repository, clone_dir):
97106
# Convert https://github.com/user/repo to https://[email protected]/user/repo
98107
clone_url = clone_url.replace('https://github.com/', f'https://{repository.github_token}@github.com/')
99108

100-
# Clone the repository
101-
git.Repo.clone_from(clone_url, clone_dir, depth=1)
102-
logger.info(f"Repository cloned to: {clone_dir}")
109+
# Clean up any existing temp directories for this repository first
110+
self._cleanup_temp_directories(clone_dir.parent)
111+
112+
# Clone the repository with error handling
113+
try:
114+
git.Repo.clone_from(clone_url, clone_dir, depth=1)
115+
logger.info(f"Repository cloned to: {clone_dir}")
116+
except git.GitCommandError as e:
117+
if "already exists and is not an empty directory" in str(e):
118+
logger.warning(f"Directory exists, cleaning and retrying: {clone_dir}")
119+
shutil.rmtree(clone_dir)
120+
clone_dir.mkdir(exist_ok=True)
121+
git.Repo.clone_from(clone_url, clone_dir, depth=1)
122+
else:
123+
raise e
124+
125+
def _cleanup_temp_directories(self, repo_backup_dir):
126+
"""Clean up old temporary directories that might be left behind"""
127+
try:
128+
temp_dirs = [d for d in repo_backup_dir.iterdir() if d.is_dir() and d.name.startswith('temp_')]
129+
for temp_dir in temp_dirs:
130+
# Remove temp directories older than 1 hour
131+
if datetime.utcnow().timestamp() - temp_dir.stat().st_mtime > 3600:
132+
logger.info(f"Cleaning up old temp directory: {temp_dir}")
133+
shutil.rmtree(temp_dir)
134+
except Exception as e:
135+
logger.warning(f"Failed to cleanup temp directories: {e}")
103136

104137
def _create_backup(self, source_dir, backup_dir, backup_name, backup_format):
105138
"""Create backup in the specified format"""

0 commit comments

Comments
 (0)