Skip to content

Commit 46cbf72

Browse files
committed
Nudge kernel with info requests
1 parent 0c83c9d commit 46cbf72

File tree

3 files changed

+119
-21
lines changed

3 files changed

+119
-21
lines changed

notebook/gateway/managers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def shutdown_kernel(self, kernel_id, now=False, restart=False):
454454
self.remove_kernel(kernel_id)
455455

456456
@gen.coroutine
457-
def restart_kernel(self, kernel_id, now=False, **kwargs):
457+
def restart_kernel(self, kernel_id, channels=None, now=False, **kwargs):
458458
"""Restart a kernel by its kernel uuid.
459459
460460
Parameters

notebook/services/kernels/handlers.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def post(self, kernel_id, action):
7878
yield maybe_future(km.interrupt_kernel(kernel_id))
7979
self.set_status(204)
8080
if action == 'restart':
81-
8281
try:
8382
yield maybe_future(km.restart_kernel(kernel_id))
8483
except Exception as e:
@@ -121,12 +120,71 @@ def __repr__(self):
121120
return "%s(%s)" % (self.__class__.__name__, getattr(self, 'kernel_id', 'uninitialized'))
122121

123122
def create_stream(self):
123+
self.log.debug("Create stream")
124124
km = self.kernel_manager
125125
identity = self.session.bsession
126126
for channel in ('shell', 'control', 'iopub', 'stdin'):
127127
meth = getattr(km, 'connect_' + channel)
128128
self.channels[channel] = stream = meth(self.kernel_id, identity=identity)
129129
stream.channel = channel
130+
131+
shell_channel = self.channels['shell']
132+
iopub_channel = self.channels['iopub']
133+
134+
future = Future()
135+
info_future = Future()
136+
iopub_future = Future()
137+
138+
def finish():
139+
"""Common cleanup"""
140+
loop.remove_timeout(timeout)
141+
loop.remove_timeout(nudge_handle)
142+
iopub_channel.stop_on_recv()
143+
shell_channel.stop_on_recv()
144+
145+
def on_shell_reply(msg):
146+
if not info_future.done():
147+
self.log.debug("Nudge: shell info reply received: %s", self.kernel_id)
148+
shell_channel.stop_on_recv()
149+
self.log.debug("Nudge: resolving shell future")
150+
info_future.set_result(msg)
151+
if iopub_future.done():
152+
finish()
153+
self.log.debug("Nudge: resolving main future in shell handler")
154+
future.set_result(info_future.result())
155+
156+
def on_iopub(msg):
157+
if not iopub_future.done():
158+
self.log.debug("Nudge: first IOPub received: %s", self.kernel_id)
159+
iopub_channel.stop_on_recv()
160+
self.log.debug("Nudge: resolving iopub future")
161+
iopub_future.set_result(None)
162+
if info_future.done():
163+
finish()
164+
self.log.debug("Nudge: resolving main future in iopub handler")
165+
future.set_result(info_future.result())
166+
167+
def on_timeout():
168+
self.log.warning("Nudge: Timeout waiting for kernel_info_reply: %s", self.kernel_id)
169+
finish()
170+
if not future.done():
171+
future.set_exception(TimeoutError("Timeout waiting for nudge"))
172+
173+
iopub_channel.on_recv(on_iopub)
174+
shell_channel.on_recv(on_shell_reply)
175+
loop = IOLoop.current()
176+
177+
# Nudge the kernel with kernel info requests until we get an IOPub message
178+
def nudge():
179+
self.log.debug("Nudge")
180+
if not future.done():
181+
self.log.debug("nudging")
182+
self.session.send(shell_channel, "kernel_info_request")
183+
nudge_handle = loop.call_later(0.5, nudge)
184+
nudge_handle = loop.call_later(0, nudge)
185+
186+
timeout = loop.add_timeout(loop.time() + self.kernel_info_timeout, on_timeout)
187+
return future
130188

131189
def request_kernel_info(self):
132190
"""send a request for kernel_info"""
@@ -193,6 +251,7 @@ def initialize(self):
193251
super().initialize()
194252
self.zmq_stream = None
195253
self.channels = {}
254+
self.kernel_manager.channels = self.channels
196255
self.kernel_id = None
197256
self.kernel_info_channel = None
198257
self._kernel_info_future = Future()
@@ -253,6 +312,7 @@ def _register_session(self):
253312
yield stale_handler.close()
254313
self._open_sessions[self.session_key] = self
255314

315+
@gen.coroutine
256316
def open(self, kernel_id):
257317
super().open()
258318
km = self.kernel_manager
@@ -269,9 +329,11 @@ def open(self, kernel_id):
269329
for channel, msg_list in replay_buffer:
270330
stream = self.channels[channel]
271331
self._on_zmq_reply(stream, msg_list)
332+
connected = Future()
333+
connected.set_result(None)
272334
else:
273335
try:
274-
self.create_stream()
336+
connected = self.create_stream()
275337
except web.HTTPError as e:
276338
self.log.error("Error opening stream: %s", e)
277339
# WebSockets don't response to traditional error codes so we
@@ -285,8 +347,13 @@ def open(self, kernel_id):
285347
km.add_restart_callback(self.kernel_id, self.on_kernel_restarted)
286348
km.add_restart_callback(self.kernel_id, self.on_restart_failed, 'dead')
287349

288-
for channel, stream in self.channels.items():
289-
stream.on_recv_stream(self._on_zmq_reply)
350+
def subscribe(value):
351+
for channel, stream in self.channels.items():
352+
stream.on_recv_stream(self._on_zmq_reply)
353+
354+
connected.add_done_callback(subscribe)
355+
356+
return connected
290357

291358
def on_message(self, msg):
292359
if not self.channels:

notebook/services/kernels/kernelmanager.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -304,33 +304,54 @@ def shutdown_kernel(self, kernel_id, now=False, restart=False):
304304

305305
return self.pinned_superclass.shutdown_kernel(self, kernel_id, now=now, restart=restart)
306306

307-
async def restart_kernel(self, kernel_id, now=False):
307+
async def restart_kernel(self, kernel_id, channels, now=False):
308308
"""Restart a kernel by kernel_id"""
309309
self._check_kernel_id(kernel_id)
310-
await maybe_future(self.pinned_superclass.restart_kernel(self, kernel_id, now=now))
310+
await maybe_future(self.pinned_superclass.restart_kernel(self, kernel_id, channels, now=now))
311311
kernel = self.get_kernel(kernel_id)
312312
# return a Future that will resolve when the kernel has successfully restarted
313-
channel = kernel.connect_shell()
313+
shell_channel = self.channels['shell']
314+
iopub_channel = self.channels['iopub']
315+
314316
future = Future()
317+
info_future = Future()
318+
iopub_future = Future()
315319

316320
def finish():
317-
"""Common cleanup when restart finishes/fails for any reason."""
318-
if not channel.closed():
319-
channel.close()
321+
"""Common cleanup"""
320322
loop.remove_timeout(timeout)
323+
loop.remove_timeout(nudge_handle)
324+
iopub_channel.stop_on_recv()
325+
shell_channel.stop_on_recv()
321326
kernel.remove_restart_callback(on_restart_failed, 'dead')
322327

323-
def on_reply(msg):
324-
self.log.debug("Kernel info reply received: %s", kernel_id)
325-
finish()
326-
if not future.done():
327-
future.set_result(msg)
328+
def on_shell_reply(msg):
329+
if not info_future.done():
330+
self.log.debug("Nudge: shell info reply received: %s", self.kernel_id)
331+
shell_channel.stop_on_recv()
332+
self.log.debug("Nudge: resolving shell future")
333+
info_future.set_result(msg)
334+
if iopub_future.done():
335+
finish()
336+
self.log.debug("Nudge: resolving main future in shell handler")
337+
future.set_result(info_future.result())
338+
339+
def on_iopub(msg):
340+
if not iopub_future.done():
341+
self.log.debug("Nudge: first IOPub received: %s", self.kernel_id)
342+
iopub_channel.stop_on_recv()
343+
self.log.debug("Nudge: resolving iopub future")
344+
iopub_future.set_result(None)
345+
if info_future.done():
346+
finish()
347+
self.log.debug("Nudge: resolving main future in iopub handler")
348+
future.set_result(info_future.result())
328349

329350
def on_timeout():
330-
self.log.warning("Timeout waiting for kernel_info_reply: %s", kernel_id)
351+
self.log.warning("Nudge: Timeout waiting for kernel_info_reply: %s", self.kernel_id)
331352
finish()
332353
if not future.done():
333-
future.set_exception(TimeoutError("Timeout waiting for restart"))
354+
future.set_exception(TimeoutError("Timeout waiting for nudge"))
334355

335356
def on_restart_failed():
336357
self.log.warning("Restarting kernel failed: %s", kernel_id)
@@ -339,10 +360,20 @@ def on_restart_failed():
339360
future.set_exception(RuntimeError("Restart failed"))
340361

341362
kernel.add_restart_callback(on_restart_failed, 'dead')
342-
kernel.session.send(channel, "kernel_info_request")
343-
channel.on_recv(on_reply)
363+
364+
iopub_channel.on_recv(on_iopub)
365+
shell_channel.on_recv(on_shell_reply)
344366
loop = IOLoop.current()
345-
timeout = loop.add_timeout(loop.time() + self.kernel_info_timeout, on_timeout)
367+
368+
# Nudge the kernel with kernel info requests until we get an IOPub message
369+
def nudge():
370+
self.log.debug("Nudge")
371+
if not future.done():
372+
self.log.debug("nudging")
373+
self.session.send(shell_channel, "kernel_info_request")
374+
nudge_handle = loop.call_later(0.5, nudge)
375+
nudge_handle = loop.call_later(0, nudge)
376+
346377
return future
347378

348379
def notify_connect(self, kernel_id):

0 commit comments

Comments
 (0)