-
Notifications
You must be signed in to change notification settings - Fork 79
Remote Association creation revamp, removal of hack-workaround in RemoteRef #577
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d82c40e
0195ee8
0c66985
24c22a0
74e8190
6c1a39f
ab2a941
d268d15
674b1f2
8e1454e
f3e5f72
639cc2a
805fec7
78b3d4c
a26a8a6
9e74a11
423f60b
32a5516
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,27 +32,27 @@ stdbuf -i0 -o0 -e0 swift run it_Clustered_swim_suspension_reachability 7337 > ${ | |
| declare -r first_pid=$(echo $!) | ||
| wait_log_exists ${first_logs} 'Binding to: ' 200 # since it might be compiling again... | ||
|
|
||
| stdbuf -i0 -o0 -e0 swift run it_Clustered_swim_suspension_reachability 8228 localhost 7337 > ${second_logs} 2>&1 & | ||
| stdbuf -i0 -o0 -e0 swift run it_Clustered_swim_suspension_reachability 8228 127.0.0.1 7337 > ${second_logs} 2>&1 & | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed defaults everywhere, was getting confused from a mix; now it's 127.0.0.1 everywhere by default. |
||
| declare -r second_pid=$(echo $!) | ||
| wait_log_exists ${second_logs} 'Binding to: ' 200 # since it might be compiling again... | ||
|
|
||
| echo "Waiting nodes to become .up..." | ||
| wait_log_exists ${first_logs} 'membershipChange(sact://System@localhost:8228 :: \[joining\] -> \[ up\])' 40 | ||
| wait_log_exists ${first_logs} 'membershipChange(sact://System@127.0.0.1:8228 :: \[joining\] -> \[ up\])' 50 | ||
| echo 'Second member seen .up, good...' | ||
|
|
||
| # suspend the second process, causing unreachability | ||
| kill -SIGSTOP ${second_pid} | ||
| jobs | ||
|
|
||
| wait_log_exists ${first_logs} 'reachabilityChange(DistributedActors.Cluster.ReachabilityChange.*localhost:8228, status: up, reachability: unreachable' 40 | ||
| wait_log_exists ${first_logs} 'reachabilityChange(DistributedActors.Cluster.ReachabilityChange.*127.0.0.1:8228, status: up, reachability: unreachable' 50 | ||
| echo 'Second member seen .unreachable, good...' | ||
|
|
||
| # resume it in the background | ||
| kill -SIGCONT ${second_pid} | ||
|
|
||
| # it should become reachable again | ||
| declare -r expected_second_member_unreachable= | ||
| wait_log_exists ${first_logs} 'reachabilityChange(DistributedActors.Cluster.ReachabilityChange.*localhost:8228, status: up, reachability: reachable' 40 | ||
| wait_log_exists ${first_logs} 'reachabilityChange(DistributedActors.Cluster.ReachabilityChange.*127.0.0.1:8228, status: up, reachability: reachable' 50 | ||
| echo 'Second member seen .unreachable, good...' | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,8 +25,8 @@ import "Serialization/Serialization.proto"; | |
|
|
||
| message HandshakeOffer { | ||
| ProtocolVersion version = 1; | ||
| UniqueNode from = 2; | ||
| Node to = 3; | ||
| UniqueNode originNode = 2; | ||
| Node targetNode = 3; | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. one of the first things / types I ever did for this project 😉
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious--How come
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's on purpose -- in say a user writes // If we discovered the node using gossip or an actor ref indicating an actor on the remote node we would indeed know the NID though; but the handshake is extended to "some node on that address". If we end up associating and that NID is different than the uniqueNode used by some actor ref, it means that other unique node is likely "wrong" in the sense that it likely was a previous actor system instance on the same host:port, but it's a new instance/process. Such messages would end up being deadlettered.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll add a specific test for this: Test: Hold "old ref" while new associated node on same host:port is associated #603 |
||
| // In the future we may want to add additional information | ||
| // about certain capabilities here. E.g. when a node supports | ||
| // faster transport like InfiniBand and the likes, so we can | ||
|
|
@@ -43,16 +43,14 @@ message HandshakeResponse { | |
|
|
||
| message HandshakeAccept { | ||
| ProtocolVersion version = 1; | ||
| UniqueNode origin = 2; | ||
| UniqueNode from = 3; | ||
| UniqueNode originNode = 2; | ||
| UniqueNode targetNode = 3; | ||
| } | ||
|
|
||
| message HandshakeReject { | ||
| ProtocolVersion version = 1; | ||
| UniqueNode origin = 2; | ||
| // In the reject case this is an `Node` instead of a `UniqueNode`, | ||
| // to explicitly prevent this from forming an association. | ||
| Node from = 3; | ||
| ProtocolVersion version = 1; | ||
| UniqueNode originNode = 2; | ||
| UniqueNode targetNode = 3; | ||
| string reason = 4; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,4 +106,4 @@ if system.cluster.node.port == 7337 { // <2> | |
|
|
||
| // end::cluster-sample-actors-discover-and-chat[] | ||
|
|
||
| system.park(atMost: .seconds(60)) | ||
| system.park(atMost: .seconds(6000)) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. basically keep them around "forever" when testing looking at logs ;) |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -635,16 +635,6 @@ public final class ActorShell<Message: ActorMessage>: ActorContext<Message>, Abs | |
| return try self._spawn(naming, props: props, behavior) | ||
| } | ||
|
|
||
| // public override func spawn<M>( | ||
| // _ naming: ActorNaming, of type: M.Type = M.self, props: Props = Props(), | ||
| // file: String = #file, line: UInt = #line, | ||
| // _ behavior: Behavior<M> | ||
| // ) throws -> ActorRef<M> | ||
| // where M: ActorMessage { | ||
| // try self.system.serialization._ensureCodableSerializer(type, file: file, line: line) | ||
| // return try self._spawn(naming, props: props, behavior) | ||
| // } | ||
|
|
||
| public override func spawnWatch<Message>( | ||
| _ naming: ActorNaming, of type: Message.Type = Message.self, props: Props, | ||
| file: String = #file, line: UInt = #line, | ||
|
|
@@ -654,15 +644,6 @@ public final class ActorShell<Message: ActorMessage>: ActorContext<Message>, Abs | |
| self.watch(try self.spawn(naming, props: props, behavior)) | ||
| } | ||
|
|
||
| // public override func spawnWatch<Message>( | ||
| // _ naming: ActorNaming, of type: Message.Type = Message.self, props: Props, | ||
| // file: String = #file, line: UInt = #line, | ||
| // _ behavior: Behavior<Message> | ||
| // ) throws -> ActorRef<Message> | ||
| // where Message: ActorMessage { | ||
| // self.watch(try self.spawn(naming, props: props, behavior)) | ||
| // } | ||
|
|
||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cleanup |
||
| public override func stop<Message: ActorMessage>(child ref: ActorRef<Message>) throws { | ||
| try self._stop(child: ref) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -483,7 +483,12 @@ public extension Behavior { | |
| case .suspend: | ||
| fatalError("Illegal to attempt to interpret message with .suspend behavior! Behavior should have been canonicalized. This is a bug, please open a ticket.", file: file, line: line) | ||
| case .suspended: | ||
| fatalError("No message should ever be delivered to a .suspended behavior! This is a bug, please open a ticket.", file: file, line: line) | ||
| fatalError(""" | ||
| No message should ever be delivered to a .suspended behavior! | ||
| Message: \(message) | ||
| Actor: \(context) | ||
| This is a bug, please open a ticket. | ||
| """, file: file, line: line) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More details in case this happens again, the previous crash would not be easy to follow up on |
||
| } | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this really is an acknowlagement to how we now use SWIM; it IS our failure detector.
It used to be both the only membership we had and the failure detector.
Now we're more true to how we use it in the settings.