Skip to content

Commit 597f3fd

Browse files
author
Marcelo RS Soares
committed
Merge branch 'master' of github.com:globocom/database-as-a-service
2 parents 8e57cf2 + 6147d99 commit 597f3fd

File tree

18 files changed

+1638
-63
lines changed

18 files changed

+1638
-63
lines changed

dbaas/backup/tasks.py

Lines changed: 170 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,12 @@ def unlock_instance(driver, instance, client):
8686
return False
8787

8888

89-
def make_instance_snapshot_backup(instance, error, group,
90-
provider_class=VolumeProviderSnapshot,
91-
target_volume=None,
92-
current_hour=None,
93-
task=None,
94-
persist=0):
89+
def make_instance_dccm_snapshot_backup(instance, error, group,
90+
provider_class=VolumeProviderSnapshot,
91+
target_volume=None,
92+
current_hour=None,
93+
task=None,
94+
persist=0):
9595
LOG.info("Make instance backup for {}".format(instance))
9696
provider = provider_class(instance)
9797
infra = instance.databaseinfra
@@ -212,6 +212,169 @@ def make_instance_snapshot_backup(instance, error, group,
212212
return snapshot
213213

214214

215+
def make_instance_gcp_snapshot_backup(
216+
instance, error, group, provider_class=VolumeProviderSnapshot, target_volume=None,
217+
current_hour=None, task=None, persist=0
218+
):
219+
LOG.info("Make instance backup for {}".format(instance))
220+
provider = provider_class(instance)
221+
infra = instance.databaseinfra
222+
database = infra.databases.first()
223+
224+
backup_retry_attempts = Configuration.get_by_name_as_int('backup_retry_attempts', default=3)
225+
226+
snapshot = Snapshot.create(
227+
instance, group, target_volume or provider.volume,
228+
environment=provider.environment, persistent=True if persist != 0 else False
229+
)
230+
231+
snapshot_final_status = Snapshot.SUCCESS
232+
233+
locked = None
234+
client = None
235+
driver = infra.get_driver()
236+
try:
237+
client = driver.get_client(instance)
238+
locked = lock_instance(driver, instance, client)
239+
if not locked:
240+
snapshot_final_status = Snapshot.WARNING
241+
242+
if 'MySQL' in type(driver).__name__:
243+
mysql_binlog_save(client, instance)
244+
245+
has_snapshot = Snapshot.objects.filter(
246+
status=Snapshot.WARNING, instance=instance, end_at__year=datetime.now().year,
247+
end_at__month=datetime.now().month, end_at__day=datetime.now().day
248+
)
249+
backup_hour_list = Configuration.get_by_name_as_list('make_database_backup_hour')
250+
if not snapshot_final_status == Snapshot.WARNING and not has_snapshot:
251+
cont = 0
252+
for _ in range(backup_retry_attempts):
253+
cont += 1
254+
try:
255+
code = 201
256+
response, data = provider.new_take_snapshot(persist=persist)
257+
258+
if response.status_code < 400:
259+
break
260+
261+
if cont >= 3:
262+
raise IndexError
263+
264+
except IndexError as e:
265+
response, content = e
266+
if response.status_code == 503:
267+
errormsg = "{} - 503 error creating snapshot for instance: {}. It will try again in 30 seconds. ".format(
268+
strftime("%d/%m/%Y %H:%M:%S"), instance
269+
)
270+
LOG.error(errormsg)
271+
if task:
272+
task.add_detail(errormsg)
273+
sleep(30)
274+
else:
275+
raise e
276+
277+
if response.status_code < 400:
278+
while code != 200:
279+
sleep(20)
280+
snap_response, snap_status = provider.take_snapshot_status(data['identifier'])
281+
if snap_response.status_code in [200, 202]:
282+
unlock_instance(driver, instance, client)
283+
if snap_response.status_code == 200:
284+
break
285+
if snap_response.status_code >= 400:
286+
raise error
287+
code = snap_response.status_code
288+
289+
snapshot.done(snap_status)
290+
snapshot.save()
291+
else:
292+
errormsg = response['message']
293+
set_backup_error(infra, snapshot, errormsg)
294+
else:
295+
if str(current_hour) in backup_hour_list:
296+
raise Exception("Backup with WARNING already created today.")
297+
298+
except Exception as e:
299+
errormsg = "Error creating snapshot: {}".format(e)
300+
error['errormsg'] = errormsg
301+
set_backup_error(infra, snapshot, errormsg)
302+
return snapshot
303+
finally:
304+
unlock_instance(driver, instance, client)
305+
306+
if not snapshot.size:
307+
command = "du -sb /data/.snapshot/%s | awk '{print $1}'" % (
308+
snapshot.snapshot_name
309+
)
310+
try:
311+
output = instance.hostname.ssh.run_script(command)
312+
size = int(output['stdout'][0])
313+
snapshot.size = size
314+
except Exception as e:
315+
snapshot.size = 0
316+
LOG.error("Error exec remote command {}".format(e))
317+
318+
backup_path = database.backup_path
319+
if backup_path:
320+
now = datetime.now()
321+
target_path = "{}/{}/{}/{}/{}".format(
322+
backup_path,
323+
now.strftime("%Y_%m_%d"),
324+
instance.hostname.hostname.split('.')[0],
325+
now.strftime("%Y%m%d%H%M%S"),
326+
infra.name
327+
)
328+
snapshot_path = "/data/.snapshot/{}/data/".format(
329+
snapshot.snapshot_name
330+
)
331+
command = """
332+
if [ -d "{backup_path}" ]
333+
then
334+
rm -rf {backup_path}/20[0-9][0-9]_[0-1][0-9]_[0-3][0-9] &
335+
mkdir -p {target_path}
336+
cp -r {snapshot_path} {target_path} &
337+
fi
338+
""".format(backup_path=backup_path,
339+
target_path=target_path,
340+
snapshot_path=snapshot_path)
341+
try:
342+
instance.hostname.ssh.run_script(command)
343+
except Exception as e:
344+
LOG.error("Error exec remote command {}".format(e))
345+
346+
snapshot.status = snapshot_final_status
347+
snapshot.end_at = datetime.now()
348+
snapshot.save()
349+
register_backup_dbmonitor(infra, snapshot)
350+
351+
return snapshot
352+
353+
354+
def make_instance_snapshot_backup(instance, error, group,
355+
provider_class=VolumeProviderSnapshot,
356+
target_volume=None,
357+
current_hour=None,
358+
task=None,
359+
persist=0):
360+
infra = instance.databaseinfra
361+
env = infra.environment
362+
if env.name == 'prod':
363+
return make_instance_dccm_snapshot_backup(instance, error, group,
364+
provider_class=provider_class,
365+
target_volume=target_volume,
366+
current_hour=current_hour,
367+
task=task,
368+
persist=persist)
369+
else:
370+
return make_instance_gcp_snapshot_backup(instance, error, group,
371+
provider_class=provider_class,
372+
target_volume=target_volume,
373+
current_hour=current_hour,
374+
task=task,
375+
persist=persist)
376+
377+
215378
def make_instance_snapshot_backup_upgrade_disk(instance, error, group, provider_class=VolumeProviderSnapshot,
216379
target_volume=None,
217380
current_hour=None):
@@ -742,6 +905,7 @@ def _create_database_backup(instance, task, group, current_hour, persist):
742905

743906
error = {}
744907
try:
908+
LOG.info('Starting make database snapshot')
745909
snapshot = make_instance_snapshot_backup(
746910
instance=instance,
747911
error=error,

dbaas/dbaas_services/analyzing/views.py

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
import logging
2121

2222

23+
LOG = logging.getLogger(__name__)
24+
25+
2326
class SubUsedResourceReport(ListView):
2427
def get(self, request, *args, **kwargs):
2528
reports = (models.AnalyzeRepository.objects.all()
@@ -87,18 +90,18 @@ def vm_by_line_database_report(self):
8790
database.attention_description,
8891
instance.hostname.hostname.encode("utf-8"),
8992
database.environment,
90-
database.team,
91-
database.team.name,
92-
database.team.team_area,
93-
database.team.email,
94-
database.team.contacts,
95-
database.team.organization.name,
93+
self._check_values(database, 'team'),
94+
self._check_values(database, 'team_name'),
95+
self._check_values(database, 'team_area'),
96+
self._check_values(database, 'team_email'),
97+
self._check_values(database, 'team_contacts'),
98+
self._check_values(database, 'team_organization'),
9699
database.created_at,
97100
database.is_in_quarantine,
98101
database.apps_bind_name,
99-
database.infra.offering.cpus,
100-
database.infra.offering.memory_size_mb,
101-
database.infra.disk_offering.size_gb(),
102+
self._check_values(database, 'cpu'),
103+
self._check_values(database, 'memory_size'),
104+
self._check_values(database, 'disk_size'),
102105
database.engine_type
103106
]
104107
writer.writerow(data)
@@ -131,20 +134,75 @@ def default_database_report(self):
131134
database.attention_description,
132135
hostname,
133136
database.environment,
134-
database.team,
135-
database.team.name,
136-
database.team.team_area,
137-
database.team.email,
138-
database.team.contacts,
139-
database.team.organization.name,
137+
self._check_values(database, 'team'),
138+
self._check_values(database, 'team_name'),
139+
self._check_values(database, 'team_area'),
140+
self._check_values(database, 'team_email'),
141+
self._check_values(database, 'team_contacts'),
142+
self._check_values(database, 'team_organization'),
140143
database.created_at,
141144
database.is_in_quarantine,
142145
database.apps_bind_name,
143-
database.infra.offering.cpus,
144-
database.infra.offering.memory_size_mb,
145-
database.infra.disk_offering.size_gb(),
146+
self._check_values(database, 'cpu'),
147+
self._check_values(database, 'memory_size'),
148+
self._check_values(database, 'disk_size'),
146149
database.engine_type
147150
]
148151
writer.writerow(data)
149152

150-
return response
153+
return response
154+
155+
def _check_values(self, database, attr):
156+
if attr == 'team':
157+
try:
158+
return database.team
159+
except:
160+
return ''
161+
162+
if attr == 'team_name':
163+
try:
164+
return database.team.name
165+
except:
166+
return ''
167+
168+
if attr == 'team_area':
169+
try:
170+
return database.team.team_area
171+
except:
172+
return ''
173+
174+
if attr == 'team_email':
175+
try:
176+
return database.team.email
177+
except:
178+
return ''
179+
180+
if attr == 'team_contacts':
181+
try:
182+
return database.team.contacts
183+
except:
184+
return ''
185+
186+
if attr == 'team_organization':
187+
try:
188+
return database.team.organization.name
189+
except:
190+
return ''
191+
192+
if attr == 'cpu':
193+
try:
194+
return database.infra.offering.cpus
195+
except:
196+
return 0
197+
198+
if attr == 'memory_size':
199+
try:
200+
return database.infra.offering.memory_size_mb
201+
except:
202+
return 0
203+
204+
if attr == 'disk_size':
205+
try:
206+
return database.infra.disk_offering.size_gb()
207+
except:
208+
return 0

dbaas/drivers/replication_topologies/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,9 @@ def get_stop_database_vm_steps(self):
732732
'workflow.steps.util.host_provider.StopIfRunning',
733733
)
734734
}]
735+
736+
def get_auto_upgrade_database_vm_offering(self):
737+
raise NotImplementedError('Not implemented for topology')
735738

736739
def get_start_database_vm_steps(self):
737740
return [{

dbaas/drivers/replication_topologies/mysql.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,22 +1065,30 @@ def get_update_ssl_steps(self):
10651065
'Disable monitoring and alarms': (
10661066
'workflow.steps.util.zabbix.DisableAlarms',
10671067
'workflow.steps.util.db_monitor.DisableMonitoring',
1068-
'workflow.steps.util.ssl.UpdateExpireAtDateRollback',
1069-
'workflow.steps.util.ssl.BackupSSLFolder',
10701068
),
10711069
}] + [{
10721070
'Disable SSL': (
10731071
'workflow.steps.util.ssl.UnSetReplicationUserRequireSSL',
10741072
),
10751073
}] + [{
10761074
'Configure SSL': (
1077-
'workflow.steps.util.ssl.UpdateSSLForInfra',
1078-
'workflow.steps.util.ssl.UpdateSSLForInstance',
1075+
'workflow.steps.util.ssl.UpdateExpireAtDateRollback',
1076+
'workflow.steps.util.ssl.MoveSSLFolder',
1077+
'workflow.steps.util.ssl.UpdateOpenSSlLib',
1078+
'workflow.steps.util.ssl.CreateSSLFolder',
1079+
'workflow.steps.util.ssl.CreateSSLConfForInfraEndPoint',
1080+
'workflow.steps.util.ssl.CreateSSLConfForInstanceIP',
1081+
'workflow.steps.util.ssl.RequestSSLForInfra',
1082+
'workflow.steps.util.ssl.RequestSSLForInstance',
10791083
'workflow.steps.util.ssl.CreateJsonRequestFileInfra',
10801084
'workflow.steps.util.ssl.CreateJsonRequestFileInstance',
10811085
'workflow.steps.util.ssl.CreateCertificateInfra',
10821086
'workflow.steps.util.ssl.CreateCertificateInstance',
10831087
'workflow.steps.util.ssl.SetSSLFilesAccessMySQL',
1088+
'workflow.steps.util.ssl.SetInfraConfiguredSSL',
1089+
'workflow.steps.util.plan.Configure',
1090+
'workflow.steps.util.plan.ConfigureLog',
1091+
'workflow.steps.util.metric_collector.ConfigureTelegraf',
10841092
'workflow.steps.util.ssl.UpdateExpireAtDate',
10851093
),
10861094
}] + [{
@@ -1089,7 +1097,7 @@ def get_update_ssl_steps(self):
10891097
'workflow.steps.util.vm.ChangeMaster',
10901098
'workflow.steps.util.database.CheckIfSwitchMaster',
10911099
'workflow.steps.util.database.Stop',
1092-
'workflow.steps.util.ssl.RestoreSSLFolder4Rollback',
1100+
# 'workflow.steps.util.ssl.RestoreSSLFolder4Rollback',
10931101
'workflow.steps.util.database.Start',
10941102
'workflow.steps.util.metric_collector.RestartTelegraf',
10951103
'workflow.steps.util.database.CheckIfSwitchMasterRollback',
@@ -1103,6 +1111,7 @@ def get_update_ssl_steps(self):
11031111
),
11041112
}] + [{
11051113
'Enabling monitoring and alarms': (
1114+
'workflow.steps.util.db_monitor.UpdateInfraSSLMonitor',
11061115
'workflow.steps.util.db_monitor.EnableMonitoring',
11071116
'workflow.steps.util.zabbix.EnableAlarms',
11081117
),

0 commit comments

Comments
 (0)