-
Notifications
You must be signed in to change notification settings - Fork 79
=cluster #55 #377 #383 #378 OnDownActions, harden singleton & Downing tests, fix TimeoutDowningStrategy #375
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e5731b1
1e8d059
2d9ea0c
1ffcb87
fcc4aef
3772884
e74b91a
48bb4c1
8541ba6
32279a4
8d7ddf1
73f6951
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,6 +83,10 @@ public struct ActorLogger { | |
| } | ||
|
|
||
| public static func make(system: ActorSystem, identifier: String? = nil) -> Logger { | ||
| if let overriddenLoggerFactory = system.settings.overrideLoggerFactory { | ||
| return overriddenLoggerFactory(identifier ?? system.name) | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was missing and thus not capturing logs on system level |
||
|
|
||
| // we need to add our own storage, and can't do so to Logger since it is a struct... | ||
| // so we need to make such "proxy log handler", that does out actor specific things. | ||
| var proxyHandler = ActorOriginLogHandler(system) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -98,7 +98,7 @@ public final class ActorSystem { | |
| // MARK: Logging | ||
|
|
||
| public var log: Logger { | ||
| var l = ActorLogger.make(system: self) // we only do this to go "through" the proxy; we may not need it in the future? | ||
| var l = ActorLogger.make(system: self) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This works now |
||
| l.logLevel = self.settings.defaultLogLevel | ||
| return l | ||
| } | ||
|
|
@@ -337,7 +337,7 @@ public final class ActorSystem { | |
| self.settings.plugins.stopAll(self) | ||
|
|
||
| DispatchQueue.global().async { | ||
| self.log.log(level: .debug, "SHUTTING DOWN ACTOR SYSTEM [\(self.name)]. All actors will be stopped.", file: #file, function: #function, line: #line) | ||
| self.log.log(level: .debug, "Shutting down actor system [\(self.name)]. All actors will be stopped.", file: #file, function: #function, line: #line) | ||
| if let cluster = self._cluster { | ||
| let receptacle = BlockingReceptacle<Void>() | ||
| cluster.ref.tell(.command(.shutdown(receptacle))) // FIXME: should be shutdown | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,8 +47,16 @@ public struct ClusterControl { | |
| self.ref.tell(.command(.initJoin(node))) | ||
| } | ||
|
|
||
| public func leave() { | ||
| self.ref.tell(.command(.downCommand(self.node.node))) | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. may get it's own command, we have a .leaving status in membership. The idea is while leaving we may still perform actions but others would not give us new work etc. |
||
|
|
||
| /// Mark as `Cluster.MemberStatus.down` _any_ incarnation of a member matching the passed in `node`. | ||
| public func down(node: Node) { | ||
| self.ref.tell(.command(.downCommand(node))) | ||
| } | ||
|
|
||
| public func down(member: Cluster.Member) { | ||
| self.ref.tell(.command(.downCommandMember(member))) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is interesting / important, thanks to using this version internally, we are carrying all the metadata that a member has correctly -- such as the reachability at the moment when someone decided to call down. Mostly a "more correct view in the cluster membership" change. For end users calling either of them will yield the expected result |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -226,7 +226,10 @@ internal enum ClusterReceptionist { | |
| } | ||
|
|
||
| private static func syncRegistrations(context: ActorContext<Receptionist.Message>, myself: ActorRef<ClusterReceptionist.FullState>) throws { | ||
| let remoteControls = context.system._cluster!.associationRemoteControls // FIXME: should not be needed and use cluster members instead | ||
| guard let cluster = context.system._cluster else { // FIXME: should not be needed and use cluster members instead | ||
| return // cannot get _cluster, perhaps we are shutting down already? | ||
| } | ||
| let remoteControls = cluster.associationRemoteControls | ||
|
|
||
| guard !remoteControls.isEmpty else { | ||
| return // nothing to do, no remote members | ||
|
|
@@ -249,6 +252,64 @@ internal enum ClusterReceptionist { | |
| } | ||
|
|
||
| private static func makeRemoteAddress(on node: UniqueNode) -> ActorAddress { | ||
| return try! .init(node: node, path: ActorPath([ActorPathSegment("system"), ActorPathSegment("receptionist")]), incarnation: .wellKnown) | ||
| try! .init(node: node, path: ActorPath([ActorPathSegment("system"), ActorPathSegment("receptionist")]), incarnation: .wellKnown) // try! safe, we know the path is legal | ||
| } | ||
| } | ||
|
|
||
| // ==== ---------------------------------------------------------------------------------------------------------------- | ||
| // MARK: DowningStrategySettings | ||
|
|
||
| public enum DowningStrategySettings { | ||
| case none | ||
| case timeout(TimeoutBasedDowningStrategySettings) | ||
|
|
||
| func make(_ clusterSettings: ClusterSettings) -> DowningStrategy? { | ||
| switch self { | ||
| case .none: | ||
| return nil | ||
| case .timeout(let settings): | ||
| return TimeoutBasedDowningStrategy(settings, selfNode: clusterSettings.uniqueBindNode) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // ==== ---------------------------------------------------------------------------------------------------------------- | ||
| // MARK: OnDownActionStrategySettings | ||
|
|
||
| public enum OnDownActionStrategySettings { | ||
| /// Take no (automatic) action upon noticing that this member is marked as [.down]. | ||
| /// | ||
| /// When using this mode you should take special care to implement some form of shutting down of this node (!). | ||
| /// As a `Cluster.MemberStatus.down` node is effectively useless for the rest of the cluster -- i.e. other | ||
| /// members MUST refuse communication with this down node. | ||
| case none | ||
| /// Upon noticing that this member is marked as [.down], initiate a shutdown. | ||
| case gracefulShutdown(delay: TimeAmount) | ||
|
|
||
| func make() -> (ActorSystem) throws -> Void { | ||
| switch self { | ||
| case .none: | ||
| return { _ in () } // do nothing | ||
|
|
||
| case .gracefulShutdown(let shutdownDelay): | ||
| return { system in | ||
| _ = try system.spawn("leaver", of: String.self, .setup { context in | ||
| guard .milliseconds(0) < shutdownDelay else { | ||
| context.log.warning("This node was marked as [.down], delay is immediate. Shutting down the system immediately!") | ||
| system.shutdown() | ||
| return .stop | ||
| } | ||
|
|
||
| context.timers.startSingle(key: "shutdown-delay", message: "shutdown", delay: shutdownDelay) | ||
| system.log.warning("This node was marked as [.down], performing OnDownAction as configured: shutting down the system, in \(shutdownDelay)") | ||
|
|
||
| return .receiveMessage { _ in | ||
| system.log.warning("Shutting down...") | ||
| system.shutdown() | ||
| return .stop | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A slight delay is useful for allowing to spread the .down gossip to others before we really die. Tests also cover the "shutdown immediately" case that it all works correctly 👍 |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,7 +110,7 @@ public struct ClusterSettings { | |
| // ==== ------------------------------------------------------------------------------------------------------------ | ||
| // MARK: Leader Election | ||
|
|
||
| public var autoLeaderElection: LeadershipSelectionSettings = .lowestAddress(minNumberOfMembers: 2) | ||
| public var autoLeaderElection: LeadershipSelectionSettings = .lowestReachable(minNumberOfMembers: 2) | ||
|
|
||
| // ==== ------------------------------------------------------------------------------------------------------------ | ||
| // MARK: TLS & Security settings | ||
|
|
@@ -143,7 +143,14 @@ public struct ClusterSettings { | |
| // ==== ---------------------------------------------------------------------------------------------------------------- | ||
| // MARK: Cluster membership and failure detection | ||
|
|
||
| public var downingStrategy: DowningStrategySettings = .none | ||
| /// Strategy how members determine if others (or myself) shall be marked as `.down`. | ||
| /// This strategy should be set to the same (or compatible) strategy on all members of a cluster to avoid split brain situations. | ||
| public var downingStrategy: DowningStrategySettings = .timeout(.default) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
+1 |
||
|
|
||
| /// When this member node notices it has been marked as `.down` in the membership, it can automatically perform an action. | ||
| /// This setting determines which action to take. Generally speaking, the best course of action is to quickly and gracefully | ||
| /// shut down the node and process, potentially leaving a higher level orchestrator to replace the node (e.g. k8s starting a new pod for the cluster). | ||
| public var onDownAction: OnDownActionStrategySettings = .gracefulShutdown(delay: .seconds(3)) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| /// Configures the SWIM failure failure detector. | ||
| public var swim: SWIM.Settings = .default | ||
|
|
@@ -168,17 +175,3 @@ public struct ClusterSettings { | |
| self.tls = tls | ||
| } | ||
| } | ||
|
|
||
| public enum DowningStrategySettings { | ||
| case none | ||
| case timeout(TimeoutBasedDowningStrategySettings) | ||
|
|
||
| func make(_ clusterSettings: ClusterSettings) -> DowningStrategy? { | ||
| switch self { | ||
| case .none: | ||
| return nil | ||
| case .timeout(let settings): | ||
| return TimeoutBasedDowningStrategy(settings, selfNode: clusterSettings.uniqueBindNode) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍