Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Docs/SWIM.adoc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

[[SWIM]]
== SWIM Membership Protocol

Swift Distributed Actors implements a variant of the https://www.cs.cornell.edu/projects/Quicksilver/public_pdfs/SWIM.pdf[SWIM Membership Protocol]
Expand Down
2 changes: 2 additions & 0 deletions Docs/actors.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ section of the guide), or its `dispatcher`.
[[suggested_props_pattern]]
==== Suggested Props Pattern

#TODO: deprecate this and replace with "shell" pattern I guess?#

Sometimes when implementing behaviors which may be spawned by other users, it may be useful to centralize the props creation
along with its default "suggested" settings. The _Suggested Props_ pattern explains a common style in which this can be solved.

Expand Down
2 changes: 1 addition & 1 deletion Docs/examples.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

This section contains links and explanations of simple and more advanced examples.

INFO: Contributions are very welcome, please reach out if you'd like to show off an example app you have built.
TIP: Contributions are very welcome, please reach out if you'd like to show off an example app you have built.

=== Dining Philosophers

Expand Down
482 changes: 276 additions & 206 deletions Docs/failure_handling.adoc

Large diffs are not rendered by default.

Binary file added Docs/images/actor_tree.graffle/data.plist
Binary file not shown.
Binary file added Docs/images/actor_tree.graffle/image1.pdf
Binary file not shown.
Binary file added Docs/images/actor_tree.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file added Docs/images/process_isolated_servants.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion Docs/serialization.adoc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

[[serialization]]
== Serialization

> Swift Distributed Actors offers a serialization layer which allows you to decouple where messages are sent and received,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Distributed Actors open source project
//
// Copyright (c) 2018-2019 Apple Inc. and the Swift Distributed Actors project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
// See CONTRIBUTORS.md for the list of Swift Distributed Actors project authors
//
// SPDX-License-Identifier: Apache-2.0
//
//===----------------------------------------------------------------------===//

#if os(OSX)
import Darwin.C
#else
import Glibc
#endif

import DistributedActors

let isolated = ProcessIsolated { boot in
boot.settings.defaultLogLevel = .info
boot.runOn(role: .servant) {
boot.settings.failure.onGuardianFailure = .systemExit(-1)
}
return ActorSystem(settings: boot.settings)
}

pprint("Started process: \(getpid()) with roles: \(isolated.roles)")

struct OnPurposeBoom: Error {}

isolated.run(on: .master) {
isolated.spawnServantProcess(supervision:
.respawn(
atMost: 5, within: nil,
backoff: Backoff.exponential(
initialInterval: .milliseconds(100),
multiplier: 1.5,
randomFactor: 0
)
)
)
}

try isolated.run(on: .servant) {
isolated.system.log.info("ISOLATED RUNNING: \(CommandLine.arguments)")

_ = try isolated.system.spawn("failed", of: String.self,
props: Props().supervision(strategy: .escalate),
.setup { context in
context.log.info("Spawned \(context.path) on servant node it will fail soon...")
context.timers.startSingle(key: "explode", message: "Boom", delay: .seconds(1))

return .receiveMessage { message in
context.log.error("Time to crash with: fatalError")
// crashes process since we do not isolate faults
fatalError("FATAL ERROR ON PURPOSE")
}
})
}

// finally, once prepared, you have to invoke the following:
// which will BLOCK on the master process and use the main thread to
// process any incoming process commands (e.g. spawn another servant)
isolated.blockAndSuperviseServants()

// ~~~ unreachable ~~~
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Distributed Actors open source project
//
// Copyright (c) 2018-2019 Apple Inc. and the Swift Distributed Actors project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
// See CONTRIBUTORS.md for the list of Swift Distributed Actors project authors
//
// SPDX-License-Identifier: Apache-2.0
//
//===----------------------------------------------------------------------===//

#if os(OSX)
import Darwin.C
#else
import Glibc
#endif

import DistributedActors

let isolated = ProcessIsolated { boot in
boot.settings.defaultLogLevel = .info
boot.runOn(role: .servant) {
boot.settings.failure.onGuardianFailure = .systemExit(-1)
}
return ActorSystem(settings: boot.settings)
}

pprint("Started process: \(getpid()) with roles: \(isolated.roles)")

struct OnPurposeBoom: Error {}

isolated.run(on: .master) {
isolated.spawnServantProcess(supervision: .respawn(atMost: 1, within: nil), args: ["fatalError"])
isolated.spawnServantProcess(supervision: .respawn(atMost: 1, within: nil), args: ["escalateError"])
}

try isolated.run(on: .servant) {
isolated.system.log.info("ISOLATED RUNNING: \(CommandLine.arguments)")

// TODO: assert command line arguments are the expected ones

_ = try isolated.system.spawn("failed", of: String.self,
props: Props().supervision(strategy: .escalate),
.setup { context in
context.log.info("Spawned \(context.path) on servant node it will fail soon...")
context.timers.startSingle(key: "explode", message: "Boom", delay: .seconds(1))

return .receiveMessage { message in
if CommandLine.arguments.contains("fatalError") {
context.log.error("Time to crash with: fatalError")
// crashes process since we do not isolate faults
fatalError("FATAL ERROR ON PURPOSE")
} else if CommandLine.arguments.contains("escalateError") {
context.log.error("Time to crash with: throwing an error, escalated to top level")
// since we .escalate and are a top-level actor, this will cause the process to die as well
throw OnPurposeBoom()
} else {
context.log.error("MISSING FAILURE MODE ARGUMENT!!! Test is constructed not properly, or arguments were not passed properly. \(CommandLine.arguments)")
fatalError("MISSING FAILURE MODE ARGUMENT!!! Test is constructed not properly, or arguments were not passed properly. \(CommandLine.arguments)")
}
}
})
}

// finally, once prepared, you have to invoke the following:
// which will BLOCK on the master process and use the main thread to
// process any incoming process commands (e.g. spawn another servant)
isolated.blockAndSuperviseServants()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hoping to get rid of this part eventually, but for now good enough 🤔


// ~~~ unreachable ~~~
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ let isolated = ProcessIsolated { boot in

pprint("Started process: \(getpid()) with roles: \(isolated.roles)")

let workersKey = Receptionist.RegistrationKey(String.self, id: "workers")

// though one can ensure to only run if in a process of a given role:
try isolated.run(on: .master) {
isolated.run(on: .master) {
// open some fds, hope to not leak them into children!
var fds: [Int] = []
for i in 1 ... 1000 {
Expand All @@ -45,7 +43,7 @@ try isolated.run(on: .master) {

/// spawn a servant

isolated.spawnServantProcess(supervision: .restart(atMost: 100, within: .seconds(1)), args: ["ALPHA"])
isolated.spawnServantProcess(supervision: .respawn(atMost: 100, within: .seconds(1)), args: ["ALPHA"])
}

// finally, once prepared, you have to invoke the following:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ try isolated.run(on: .master) {
})

// should we allow anyone to issue this, or only on master? we could `runOnMaster { control` etc
isolated.spawnServantProcess(supervision: .restart(atMost: 100, within: .seconds(1)), args: ["ALPHA"])
isolated.spawnServantProcess(supervision: .respawn(atMost: 100, within: .seconds(1)), args: ["ALPHA"])
}

// Notice that master has no workers, just the pool...
Expand Down
7 changes: 7 additions & 0 deletions IntegrationTests/tests_02_process_isolated/shared.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
##
##===----------------------------------------------------------------------===##

RED='\033[0;31m'
RST='\033[0m'

function echoerr() {
echo "${RED}$@${RST}" 1>&2;
}

function _killall() {
set +e
local killall_app_name="$1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ await_n_processes "$app_name" 2

if [[ $(ps aux | awk '{print $2}' | grep ${pid_servant} | grep -v 'grep' | wc -l) -ne 0 ]]; then
echo "ERROR: Seems the servant was not killed!!!"
exit -2
_killall ${app_name}
exit -1
fi

await_n_processes "$app_name" 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
set -e
#set -x # verbose

declare -r RED='\033[0;31m'
declare -r RST='\033[0m'

declare -r my_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
declare -r root_path="$my_path/.."

Expand Down Expand Up @@ -52,7 +49,9 @@ for pid_servant in $pid_servants; do
if [[ $(lsof -p $pid_servant | wc -l) -gt 100 ]]; then
lsof -p $pid_servant
printf "${RED}ERROR: Seems the servant [${pid_servant}] has too many FDs open, did the masters FD leak?${RST}\n"
exit -2

_killall ${app_name}
exit -1
fi
done

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/bash
##===----------------------------------------------------------------------===##
##
## This source file is part of the Swift Distributed Actors open source project
##
## Copyright (c) 2018-2019 Apple Inc. and the Swift Distributed Actors project authors
## Licensed under Apache License v2.0
##
## See LICENSE.txt for license information
## See CONTRIBUTORS.md for the list of Swift Distributed Actors project authors
##
## SPDX-License-Identifier: Apache-2.0
##
##===----------------------------------------------------------------------===##

set -e
#set -x # verbose

declare -r my_path="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
declare -r root_path="$my_path/.."

declare -r app_name='it_ProcessIsolated_escalatingWorkers'

cd ${root_path}

source ${my_path}/shared.sh

_killall ${app_name}

# ====------------------------------------------------------------------------------------------------------------------
# MARK: the app has workers which fail so hard that the failures reach the top level actors which then terminate the system
# when the system terminates we kill the process; once the process terminates, the servant supervision kicks in and
# restarts the entire process; layered supervision for they win!

swift build # synchronously ensure built

declare -r log_file="/tmp/${app_name}.log"
rm -f ${log_file}
swift run ${app_name} > ${log_file} &

declare -r supervision_respawn_grep_txt='supervision: RESPAWN'
declare -r supervision_stop_grep_txt='supervision: STOP'

# we want to wait until 2 RESPAWNs are found in the logs; then we can check if the other conditions are as we expect
echo "Waiting for servant to RESPAWN a few times..."
spin=1 # spin counter
max_spins=20
while [[ $(cat ${log_file} | grep "${supervision_stop_grep_txt}" | wc -l) -ne 2 ]]; do
sleep 1
spin=$((spin+1))
if [[ ${spin} -eq ${max_spins} ]]; then
echoerr "Never saw enough '${supervision_stop_grep_txt}' in logs."
cat ${log_file}
exit -1
fi
done

echo '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
cat ${log_file} | grep "${supervision_respawn_grep_txt}"
echo '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

echo '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
cat ${log_file} | grep "${supervision_stop_grep_txt}"
echo '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

if [[ $(cat ${log_file} | grep "${supervision_respawn_grep_txt}" | wc -l) -ne 2 ]]; then
echoerr "ERROR: We expected 2 servants to only respawn once, yet other number of respawns was detected!"
cat ${log_file}

_killall ${app_name}
exit -1
fi

if [[ $(cat ${log_file} | grep "${supervision_stop_grep_txt}" | wc -l) -ne 2 ]]; then
echoerr "ERROR: Expected the servants to STOP after they are replaced once!"
cat ${log_file}

_killall ${app_name}
exit -2
fi

# === cleanup ----------------------------------------------------------------------------------------------------------

_killall ${app_name}
Loading