Skip to content

Commit ac32225

Browse files
committed
only escalation should cause escalated to be populated in signal
1 parent a2e8cc2 commit ac32225

File tree

6 files changed

+45
-12
lines changed

6 files changed

+45
-12
lines changed

Samples/SampleMetrics/main.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ struct MetricPrinter {
8080

8181
return .receiveMessage { _ in
8282
print("------------------------------------------------------------------------------------------")
83-
print(prom.collect())
83+
prom.collect { (stringRepr: String) in
84+
print(stringRepr)
85+
}
8486

8587
return .same
8688
}

Sources/DistributedActors/ActorShell.swift

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -319,15 +319,16 @@ internal final class ActorShell<Message>: ActorContext<Message>, AbstractActor {
319319
switch circumstances {
320320
// escalation takes precedence over death watch in terms of how we report errors
321321
case .escalating(let failure):
322+
// we only populate `escalation` if the child is escalating
322323
let terminated = Signals.ChildTerminated(address: ref.address, escalation: failure)
323-
try self.interpretChildTerminatedSignal(who: ref, terminated: terminated, escalation: true)
324+
try self.interpretChildTerminatedSignal(who: ref, terminated: terminated)
324325

325326
case .stopped:
326327
let terminated = Signals.ChildTerminated(address: ref.address, escalation: nil)
327-
try self.interpretChildTerminatedSignal(who: ref, terminated: terminated, escalation: false)
328-
case .failed(let failure):
329-
let terminated = Signals.ChildTerminated(address: ref.address, escalation: failure)
330-
try self.interpretChildTerminatedSignal(who: ref, terminated: terminated, escalation: false)
328+
try self.interpretChildTerminatedSignal(who: ref, terminated: terminated)
329+
case .failed:
330+
let terminated = Signals.ChildTerminated(address: ref.address, escalation: nil)
331+
try self.interpretChildTerminatedSignal(who: ref, terminated: terminated)
331332
}
332333

333334
case .nodeTerminated(let remoteNode):
@@ -396,10 +397,7 @@ internal final class ActorShell<Message>: ActorContext<Message>, AbstractActor {
396397
internal func fail(_ error: Error) {
397398
self._myCell.mailbox.setFailed()
398399
self.behavior = self.behavior.fail(cause: .error(error))
399-
// TODO: we could handle here "wait for children to terminate"
400400

401-
// we only finishTerminating() here and not right away in message handling in order to give the Mailbox
402-
// a chance to react to the problem as well; I.e. 1) we throw 2) mailbox sets terminating 3) we get fail() 4) we REALLY terminate
403401
switch error {
404402
case DeathPactError.unhandledDeathPact(_, _, let message):
405403
self.log.error("\(message)") // TODO: configurable logging? in props?
@@ -530,6 +528,8 @@ internal final class ActorShell<Message>: ActorContext<Message>, AbstractActor {
530528

531529
// become stopped, if not already
532530
switch self.behavior.underlying {
531+
case .failed(_, let failure):
532+
self.behavior = .stop(reason: .failure(failure))
533533
case .stop(_, let reason):
534534
self.behavior = .stop(reason: reason)
535535
default:
@@ -727,7 +727,7 @@ extension ActorShell {
727727
}
728728

729729
@inlinable
730-
internal func interpretChildTerminatedSignal(who terminatedRef: AddressableActorRef, terminated: Signals.ChildTerminated, escalation: Bool) throws {
730+
internal func interpretChildTerminatedSignal(who terminatedRef: AddressableActorRef, terminated: Signals.ChildTerminated) throws {
731731
#if SACT_TRACE_ACTOR_SHELL
732732
self.log.info("Received \(terminated)")
733733
#endif

Sources/DistributedActors/ActorSystemSettings.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ public struct ActorSystemSettings {
5555
/// - SeeAlso: `FaultSupervisionMode` for a detailed discussion of the available modes.
5656
public var faultSupervisionMode: FaultSupervisionMode = .isolateYetMayLeakMemory
5757

58+
/// Determines what action should be taken when a failure is escalated to a top level guardian (e.g. `/user` or `/system).
5859
public var guardianFailureHandling: GuardianFailureHandling = .shutdownActorSystem
5960
}
6061

Sources/DistributedActors/ProcessIsolated/ProcessIsolated.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ extension ProcessIsolated {
312312
self.system.log.info("\(messagePrefix): RESTART, as decided by: \(restartLogic)")
313313
self.control.requestSpawnServant(supervision: servant.supervisionStrategy, args: servant.args)
314314
case .restartBackoff:
315+
self.system.log.info("\(messagePrefix): RESTART BACKOFF, as decided by: \(restartLogic)")
315316
// TODO: implement backoff for process isolated
316317
fatalError("\(messagePrefix): BACKOFF NOT IMPLEMENTED YET")
317318
}

Sources/DistributedActors/Props.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import NIO
1616

1717
// ==== ----------------------------------------------------------------------------------------------------------------
18-
// MARK: Actor Props
18+
// MARK: Props
1919

2020
/// `Props` configure an Actors' properties such as mailbox, dispatcher as well as supervision semantics.
2121
///

Sources/DistributedActors/Signals.swift

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,42 @@ public enum Signals {
8888
}
8989

9090
/// Signal sent to a parent actor when an actor it has spawned, i.e. its child, has terminated.
91+
/// Upon processing this signal, the parent MAY choose to spawn another child with the _same_ name as the now terminated child --
92+
/// a guarantee which is not enjoyed by watching actors from any other actor.
93+
///
94+
/// This signal is sent to the parent _always_, i.e. both for the child stopping naturally as well as failing.
95+
///
96+
/// ### Death Pacts with Children
9197
///
92-
/// This signal is sent and can be handled regardless if the child was watched (using `context.watch()`) or not.
9398
/// If the child is NOT being watched by the parent, this signal will NOT cause the parent (recipient of this signal)
9499
/// to kill kill itself by throwing an [DeathPactError], as this is reserved only to when a death pact is formed.
95100
/// In other words, if the parent spawns child actors but does not watch them, this is taken as not caring enough about
96101
/// their lifetime as to trigger termination itself if one of them terminates.
97102
///
103+
/// ### Failure Escalation
104+
///
105+
/// It is possible, because of the special relationship parent-child actors enjoy, to spawn a child actor using the
106+
/// `.escalate` strategy, which means that if the child fails, it will populate the `escalation` failure reason of
107+
/// the `ChildTerminated` signal. Propagating failure reasons is not supported through `watch`-ed actors, and is only
108+
/// available to parent-child pairs.
109+
///
110+
/// This `escalation` failure can by used by the parent to decide if it should also fail, spawn a replacement child,
111+
/// or perform any other action, manually. Not that spawning another actor in response to `ChildTerminated` means losing
112+
/// the child's mailbox; unlike using the `.restart` supervision strategy, which keeps the mailbox, but instantiates
113+
/// a new instance of the child behavior.
114+
///
115+
/// It is NOT recommended to perform deep inspection of the escalated failure to perform complex logic, however it
116+
/// may be used to determine if a specific error is "very bad" or "not bad enough" and we should start a replacement child.
117+
///
118+
/// #### "Bubbling-up" Escalated Failures
119+
///
120+
/// Escalated failures which are not handled will cause the parent to crash as well (!).
121+
/// This enables spawning a hierarchy of actors, all of which use the `.escalate` strategy, meaning that the entire
122+
/// section of the tree will be torn down upon failure of one of the workers. A higher level supervisor may then decide to
123+
/// restart one of the higher actors, causing a "sub tree" to be restarted in response to a worker failure. Alternatively,
124+
/// this pattern is useful when one wants to bubble up failures all the way to the guardian actors (`/user`, or `/system`),
125+
/// in which case the system will issue a configured termination action (see `ActorSystemSettings.guardianFailureHandling`).
126+
///
98127
/// - Note: Note that `ChildTerminated` IS-A `Terminated` so unless you need to specifically react to a child terminating,
99128
/// you may choose to handle all `Terminated` signals the same way.
100129
///

0 commit comments

Comments
 (0)