@@ -22,11 +22,25 @@ message CoordinationServiceConfig {
2222 // Maximum wait time for all members in the cluster to be registered.
2323 int64 cluster_register_timeout_in_ms = 4 ;
2424
25- // Heartbeat timeout, if a worker does not record heartbeat in this time
25+ // Heartbeat timeout, if a task does not record heartbeat in this time
2626 // window, it will be considered disconnected.
27+ // Note: This is also used as a grace period to accept any heartbeats after
28+ // the agent has disconnected, to account for the lag time between the service
29+ // recording the state change and the agent stopping heartbeats.
2730 int64 heartbeat_timeout_in_ms = 5 ;
2831
2932 // The list of jobs that partipate in the coordination service. If empty, all
3033 // jobs will be included in the coordination service by default.
3134 repeated string coordinated_jobs = 6 ;
35+
36+ // Denotes how long to wait for all coordination agents to reach the barriers
37+ // (after the first shutdown request) before disconnecting together. If
38+ // set to 0, no barrier is imposed upon shutdown and each worker can
39+ // disconnect individually.
40+ int64 shutdown_barrier_timeout_in_ms = 7 ;
41+
42+ // If set, agents do not make an explicit Shutdown() call. Service will only
43+ // find out about the disconnecte agent via stale heartbeats. Used for
44+ // testing.
45+ bool agent_destruction_without_shutdown = 8 ;
3246}
0 commit comments