Skip to content

Commit 4229a9a

Browse files
author
Marcelo Rodrigues Da Silva Soares
committed
Merge branch 'autoupgrade-wait-changemaster' into 'dev'
AutoUpgrade - Change master modifications See merge request dbdev/dbaas!8
2 parents 448991e + d3767a7 commit 4229a9a

File tree

3 files changed

+77
-19
lines changed

3 files changed

+77
-19
lines changed

dbaas/drivers/base.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,9 @@ def is_replication_ok(self, instance):
253253
def switch_master(self, instance=None, preferred_slave_instance=None):
254254
raise NotImplementedError()
255255

256+
def switch_master_with_stepdowntime(self, instance=None, preferred_slave_instance=None, stepdown_time=60):
257+
raise NotImplementedError()
258+
256259
def get_database_instances(self, ):
257260
driver_name = self.name.upper()
258261
instances = [instance
@@ -369,6 +372,35 @@ def check_replication_and_switch(self, instance, attempts=100,
369372
"Could not switch master because of replication's delay"
370373
)
371374

375+
def check_replication_and_switch_with_stepdown_time(self, instance, attempts=100,
376+
check_is_master_attempts=5,
377+
preferred_slave_instance=None,
378+
stepdown_time=60):
379+
LOG.info("Check Replication with StepDown time of %s seconds", stepdown_time)
380+
from time import sleep
381+
for attempt in range(0, attempts):
382+
if self.is_replication_ok(instance):
383+
self.switch_master_with_stepdowntime(instance, preferred_slave_instance, stepdown_time)
384+
LOG.info("Switch master returned ok...")
385+
386+
check_is_master_attempts_count = check_is_master_attempts
387+
while self.check_instance_is_master(instance,
388+
default_timeout=False):
389+
if check_is_master_attempts_count == 0:
390+
break
391+
check_is_master_attempts_count -= 1
392+
sleep(10)
393+
else:
394+
return
395+
396+
raise Exception("Could not change master")
397+
398+
LOG.info("Waiting 10s to check replication...")
399+
sleep(10)
400+
raise Exception(
401+
"Could not switch master because of replication's delay"
402+
)
403+
372404
def get_database_agents(self):
373405
""" Returns database agents list"""
374406
raise NotImplementedError()

dbaas/drivers/mongodb.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,16 @@ def switch_master(self, instance=None, preferred_slave_instance=None):
546546
except pymongo.errors.AutoReconnect:
547547
pass
548548

549+
def switch_master_with_stepdowntime(self, instance=None, preferred_slave_instance=None, stepdown_time=60):
550+
client = self.get_client(None)
551+
try:
552+
client.admin.command(
553+
'replSetStepDown', stepdown_time,
554+
secondaryCatchUpPeriodSecs=60
555+
)
556+
except pymongo.errors.AutoReconnect:
557+
pass
558+
549559
def get_database_agents(self):
550560
return []
551561

dbaas/workflow/steps/util/vm.py

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
import logging
23
from time import sleep
34
from dbaas_credentials.models import CredentialType
45
from util import get_credentials_for
@@ -7,6 +8,8 @@
78
CHANGE_MASTER_ATTEMPS = 30
89
CHANGE_MASTER_SECONDS = 15
910

11+
LOG = logging.getLogger(__name__)
12+
1013

1114
class HostStatus(object):
1215
@staticmethod
@@ -159,33 +162,47 @@ class ChangeMasterTemporaryInstance(ChangeMaster):
159162

160163
@property
161164
def is_valid(self):
162-
if self.instance.temporary or self.check_master_is_temporary():
165+
master_temporary = self.check_master_is_temporary()
166+
# so executa para a VM tepmoraria, e se a Master nao eh temporaria
167+
if not self.instance.temporary or master_temporary:
163168
return False
164-
return super(ChangeMasterTemporaryInstance, self).is_valid
165169

166-
def check_master_is_temporary(self):
170+
return True
171+
172+
def check_master_is_temporary(self, wait_seconds=0):
173+
LOG.info("Checking master is temporary instance")
174+
LOG.debug("Willl sleep for %s seconds before checking", wait_seconds)
175+
sleep(wait_seconds)
176+
167177
master = self.driver.get_master_instance()
168-
if master.temporary:
169-
return True
170-
return False
178+
LOG.info("Master instance is %s", master)
179+
LOG.info("Master is temporary? %s", master.temporary)
180+
181+
if master is None or not master.temporary:
182+
return False
183+
184+
return True
171185

172186
def change_master(self):
173187
error = None
174188

175189
for _ in range(CHANGE_MASTER_ATTEMPS):
176-
if self.is_slave:
177-
return
190+
error = None
178191
try:
179-
self.driver.check_replication_and_switch(self.target_instance)
180-
if not self.check_master_is_temporary():
192+
LOG.info("Trying to change master. Attempt %s", _)
193+
self.driver.check_replication_and_switch_with_stepdown_time(self.target_instance, stepdown_time=300)
194+
master_is_temporary = self.check_master_is_temporary(wait_seconds=60)
195+
196+
if not master_is_temporary:
181197
raise Exception('Master is not the temporary instance')
198+
199+
return
182200
except Exception as e:
183201
error = e
184202
sleep(CHANGE_MASTER_SECONDS)
185-
else:
186-
return
187203

188-
raise error
204+
if error is not None:
205+
raise error
189206

190207
def do(self):
191208
if not self.is_valid:
@@ -206,19 +223,18 @@ def change_master(self):
206223
error = None
207224

208225
for _ in range(CHANGE_MASTER_ATTEMPS):
209-
if self.is_slave:
210-
return
211226
try:
212227
self.driver.check_replication_and_switch(self.target_instance)
213-
if self.check_master_is_temporary():
228+
if self.check_master_is_temporary(wait_seconds=60):
214229
raise Exception('Master is the temporary instance')
230+
231+
return
215232
except Exception as e:
216233
error = e
217234
sleep(CHANGE_MASTER_SECONDS)
218-
else:
219-
return
220235

221-
raise error
236+
if error is not None:
237+
raise error
222238

223239

224240
class ChangeMasterDatabaseMigrate(ChangeMaster):

0 commit comments

Comments
 (0)