Skip to content

Commit 43a69f0

Browse files
committed
short-circuit SSH check; linear backoff
1 parent 9a9e035 commit 43a69f0

File tree

1 file changed

+20
-9
lines changed

1 file changed

+20
-9
lines changed

ec2/spark_ec2.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def parse_args():
6363
help="Number of slaves to launch (default: %default)")
6464
parser.add_option(
6565
"-w", "--wait", type="int",
66-
help="DEPRECATED - Seconds to wait for nodes to start")
66+
help="DEPRECATED (no longer necessary) - Seconds to wait for nodes to start")
6767
parser.add_option(
6868
"-k", "--key-pair",
6969
help="Key pair to use on instances")
@@ -602,11 +602,8 @@ def is_ssh_available(host, opts):
602602
try:
603603
with open(os.devnull, 'w') as devnull:
604604
ret = subprocess.check_call(
605-
ssh_command(opts) +
606-
['-t', '-t',
607-
'-o', 'ConnectTimeout=3',
608-
'%s@%s' % (opts.user, host),
609-
stringify_command('true')],
605+
ssh_command(opts) + ['-t', '-t', '-o', 'ConnectTimeout=3',
606+
'%s@%s' % (opts.user, host), stringify_command('true')],
610607
stdout=devnull,
611608
stderr=devnull
612609
)
@@ -615,6 +612,14 @@ def is_ssh_available(host, opts):
615612
return False
616613

617614

615+
def is_cluster_ssh_available(cluster_instances, opts):
616+
for i in cluster_instances:
617+
if not is_ssh_available(host=i.ip_address, opts=opts):
618+
return False
619+
else:
620+
return True
621+
622+
618623
def wait_for_cluster_state(cluster_instances, cluster_state, opts):
619624
"""
620625
cluster_instances: a list of boto.ec2.instance.Instance
@@ -628,20 +633,26 @@ def wait_for_cluster_state(cluster_instances, cluster_state, opts):
628633
)
629634
sys.stdout.flush()
630635

636+
num_attempts = 0
637+
631638
while True:
639+
time.sleep(3 * num_attempts)
640+
632641
for i in cluster_instances:
633642
s = i.update() # capture output to suppress print to screen in newer versions of boto
634-
# print "{instance}: {state}".format(instance=i.id, state=i.state)
643+
635644
if cluster_state == 'ssh-ready':
636645
if all(i.state == 'running' for i in cluster_instances) and \
637-
all(is_ssh_available(host=i.ip_address, opts=opts) for i in cluster_instances):
646+
is_cluster_ssh_available(cluster_instances, opts):
638647
break
639648
else:
640649
if all(i.state == cluster_state for i in cluster_instances):
641650
break
651+
652+
num_attempts += 1
653+
642654
sys.stdout.write(".")
643655
sys.stdout.flush()
644-
time.sleep(3)
645656

646657
sys.stdout.write("\n")
647658

0 commit comments

Comments
 (0)