Skip to content

Commit 1d306ef

Browse files
committed
Send ban per outstanding child connections
1 parent 526c526 commit 1d306ef

File tree

14 files changed

+145
-88
lines changed

14 files changed

+145
-88
lines changed

server/src/main/java/org/elasticsearch/client/node/NodeClient.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.tasks.TaskManager;
3636
import org.elasticsearch.threadpool.ThreadPool;
3737
import org.elasticsearch.transport.RemoteClusterService;
38+
import org.elasticsearch.transport.Transport;
3839

3940
import java.util.Map;
4041
import java.util.function.Supplier;
@@ -54,6 +55,7 @@ public class NodeClient extends AbstractClient {
5455
* {@link #executeLocally(ActionType, ActionRequest, TaskListener)}.
5556
*/
5657
private Supplier<String> localNodeId;
58+
private Transport.Connection localConnection;
5759
private RemoteClusterService remoteClusterService;
5860
private NamedWriteableRegistry namedWriteableRegistry;
5961

@@ -63,10 +65,12 @@ public NodeClient(Settings settings, ThreadPool threadPool) {
6365

6466
@SuppressWarnings("rawtypes")
6567
public void initialize(Map<ActionType, TransportAction> actions, TaskManager taskManager, Supplier<String> localNodeId,
66-
RemoteClusterService remoteClusterService, NamedWriteableRegistry namedWriteableRegistry) {
68+
Transport.Connection localConnection, RemoteClusterService remoteClusterService,
69+
NamedWriteableRegistry namedWriteableRegistry) {
6770
this.actions = actions;
6871
this.taskManager = taskManager;
6972
this.localNodeId = localNodeId;
73+
this.localConnection = localConnection;
7074
this.remoteClusterService = remoteClusterService;
7175
this.namedWriteableRegistry = namedWriteableRegistry;
7276
}
@@ -101,7 +105,7 @@ void doExecute(ActionType<Response> action, Request request, ActionListener<Resp
101105
public < Request extends ActionRequest,
102106
Response extends ActionResponse
103107
> Task executeLocally(ActionType<Response> action, Request request, ActionListener<Response> listener) {
104-
return taskManager.registerAndExecute("transport", transportAction(action), request,
108+
return taskManager.registerAndExecute("transport", transportAction(action), request, localConnection,
105109
(t, r) -> {
106110
try {
107111
listener.onResponse(r);
@@ -129,7 +133,7 @@ > Task executeLocally(ActionType<Response> action, Request request, ActionListen
129133
public < Request extends ActionRequest,
130134
Response extends ActionResponse
131135
> Task executeLocally(ActionType<Response> action, Request request, TaskListener<Response> listener) {
132-
return taskManager.registerAndExecute("transport", transportAction(action), request,
136+
return taskManager.registerAndExecute("transport", transportAction(action), request, localConnection,
133137
listener::onResponse, listener::onFailure);
134138
}
135139

server/src/main/java/org/elasticsearch/node/Node.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -698,11 +698,12 @@ protected Node(final Environment initialEnvironment,
698698
resourcesToClose.addAll(pluginLifecycleComponents);
699699
resourcesToClose.add(injector.getInstance(PeerRecoverySourceService.class));
700700
this.pluginLifecycleComponents = Collections.unmodifiableList(pluginLifecycleComponents);
701-
client.initialize(injector.getInstance(new Key<Map<ActionType, TransportAction>>() {}), transportService.getTaskManager(),
702-
() -> clusterService.localNode().getId(), transportService.getRemoteClusterService(),
703-
namedWriteableRegistry
704-
705-
);
701+
client.initialize(injector.getInstance(new Key<Map<ActionType, TransportAction>>() {}),
702+
transportService.getTaskManager(),
703+
() -> clusterService.localNode().getId(),
704+
transportService.getLocalNodeConnection(),
705+
transportService.getRemoteClusterService(),
706+
namedWriteableRegistry);
706707
this.namedWriteableRegistry = namedWriteableRegistry;
707708

708709
logger.debug("initializing HTTP handlers ...");

server/src/main/java/org/elasticsearch/tasks/TaskCancellationService.java

Lines changed: 63 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,31 @@
2828
import org.elasticsearch.action.StepListener;
2929
import org.elasticsearch.action.support.ChannelActionListener;
3030
import org.elasticsearch.action.support.GroupedActionListener;
31-
import org.elasticsearch.cluster.node.DiscoveryNode;
3231
import org.elasticsearch.common.io.stream.StreamInput;
3332
import org.elasticsearch.common.io.stream.StreamOutput;
3433
import org.elasticsearch.threadpool.ThreadPool;
3534
import org.elasticsearch.transport.EmptyTransportResponseHandler;
35+
import org.elasticsearch.transport.Transport;
3636
import org.elasticsearch.transport.TransportChannel;
3737
import org.elasticsearch.transport.TransportException;
3838
import org.elasticsearch.transport.TransportRequest;
39+
import org.elasticsearch.transport.TransportRequestDeduplicator;
3940
import org.elasticsearch.transport.TransportRequestHandler;
41+
import org.elasticsearch.transport.TransportRequestOptions;
4042
import org.elasticsearch.transport.TransportResponse;
4143
import org.elasticsearch.transport.TransportService;
4244

4345
import java.io.IOException;
4446
import java.util.Collection;
4547
import java.util.List;
48+
import java.util.Objects;
4649

4750
public class TaskCancellationService {
4851
public static final String BAN_PARENT_ACTION_NAME = "internal:admin/tasks/ban";
4952
private static final Logger logger = LogManager.getLogger(TaskCancellationService.class);
5053
private final TransportService transportService;
5154
private final TaskManager taskManager;
55+
private final TransportRequestDeduplicator<CancelRequest> deduplicator = new TransportRequestDeduplicator<>();
5256

5357
public TaskCancellationService(TransportService transportService) {
5458
this.transportService = transportService;
@@ -61,35 +65,63 @@ private String localNodeId() {
6165
return transportService.getLocalNode().getId();
6266
}
6367

64-
void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener<Void> listener) {
68+
private static class CancelRequest {
69+
final CancellableTask task;
70+
final boolean waitForCompletion;
71+
72+
CancelRequest(CancellableTask task, boolean waitForCompletion) {
73+
this.task = task;
74+
this.waitForCompletion = waitForCompletion;
75+
}
76+
77+
@Override
78+
public boolean equals(Object o) {
79+
if (this == o) return true;
80+
if (o == null || getClass() != o.getClass()) return false;
81+
final CancelRequest that = (CancelRequest) o;
82+
return waitForCompletion == that.waitForCompletion && Objects.equals(task, that.task);
83+
}
84+
85+
@Override
86+
public int hashCode() {
87+
return Objects.hash(task, waitForCompletion);
88+
}
89+
}
90+
91+
void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener<Void> finalListener) {
92+
deduplicator.executeOnce(new CancelRequest(task, waitForCompletion), finalListener,
93+
(r, listener) -> doCancelTaskAndDescendants(task, reason, waitForCompletion, listener));
94+
}
95+
96+
void doCancelTaskAndDescendants(CancellableTask task, String reason, boolean waitForCompletion, ActionListener<Void> listener) {
6597
final TaskId taskId = task.taskInfo(localNodeId(), false).getTaskId();
6698
if (task.shouldCancelChildrenOnCancellation()) {
6799
logger.trace("cancelling task [{}] and its descendants", taskId);
68100
StepListener<Void> completedListener = new StepListener<>();
69101
GroupedActionListener<Void> groupedListener = new GroupedActionListener<>(completedListener.map(r -> null), 3);
70-
Collection<DiscoveryNode> childrenNodes = taskManager.startBanOnChildrenNodes(task.getId(), () -> {
102+
Collection<Transport.Connection> childConnections = taskManager.startBanOnChildTasks(task.getId(), () -> {
71103
logger.trace("child tasks of parent [{}] are completed", taskId);
72104
groupedListener.onResponse(null);
73105
});
74106
taskManager.cancel(task, reason, () -> {
75107
logger.trace("task [{}] is cancelled", taskId);
76108
groupedListener.onResponse(null);
77109
});
78-
StepListener<Void> banOnNodesListener = new StepListener<>();
79-
setBanOnNodes(reason, waitForCompletion, task, childrenNodes, banOnNodesListener);
80-
banOnNodesListener.whenComplete(groupedListener::onResponse, groupedListener::onFailure);
110+
StepListener<Void> setBanListener = new StepListener<>();
111+
setBanOnChildConnections(reason, waitForCompletion, task, childConnections, setBanListener);
112+
setBanListener.whenComplete(groupedListener::onResponse, groupedListener::onFailure);
81113
// If we start unbanning when the last child task completed and that child task executed with a specific user, then unban
82114
// requests are denied because internal requests can't run with a user. We need to remove bans with the current thread context.
83115
final Runnable removeBansRunnable = transportService.getThreadPool().getThreadContext()
84-
.preserveContext(() -> removeBanOnNodes(task, childrenNodes));
85-
// We remove bans after all child tasks are completed although in theory we can do it on a per-node basis.
116+
.preserveContext(() -> removeBanOnChildConnections(task, childConnections));
117+
// We remove bans after all child tasks are completed although in theory we can do it on a per-connection basis.
86118
completedListener.whenComplete(r -> removeBansRunnable.run(), e -> removeBansRunnable.run());
87-
// if wait_for_completion is true, then only return when (1) bans are placed on child nodes, (2) child tasks are
88-
// completed or failed, (3) the main task is cancelled. Otherwise, return after bans are placed on child nodes.
119+
// if wait_for_completion is true, then only return when (1) bans are placed on child connections, (2) child tasks are
120+
// completed or failed, (3) the main task is cancelled. Otherwise, return after bans are placed on child connections.
89121
if (waitForCompletion) {
90122
completedListener.whenComplete(r -> listener.onResponse(null), listener::onFailure);
91123
} else {
92-
banOnNodesListener.whenComplete(r -> listener.onResponse(null), listener::onFailure);
124+
setBanListener.whenComplete(r -> listener.onResponse(null), listener::onFailure);
93125
}
94126
} else {
95127
logger.trace("task [{}] doesn't have any children that should be cancelled", taskId);
@@ -102,47 +134,48 @@ void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitF
102134
}
103135
}
104136

105-
private void setBanOnNodes(String reason, boolean waitForCompletion, CancellableTask task,
106-
Collection<DiscoveryNode> childNodes, ActionListener<Void> listener) {
107-
if (childNodes.isEmpty()) {
137+
private void setBanOnChildConnections(String reason, boolean waitForCompletion, CancellableTask task,
138+
Collection<Transport.Connection> childConnections, ActionListener<Void> listener) {
139+
if (childConnections.isEmpty()) {
108140
listener.onResponse(null);
109141
return;
110142
}
111143
final TaskId taskId = new TaskId(localNodeId(), task.getId());
112-
logger.trace("cancelling child tasks of [{}] on child nodes {}", taskId, childNodes);
113-
GroupedActionListener<Void> groupedListener = new GroupedActionListener<>(listener.map(r -> null), childNodes.size());
144+
logger.trace("cancelling child tasks of [{}] on child connections {}", taskId, childConnections);
145+
GroupedActionListener<Void> groupedListener = new GroupedActionListener<>(listener.map(r -> null), childConnections.size());
114146
final BanParentTaskRequest banRequest = BanParentTaskRequest.createSetBanParentTaskRequest(taskId, reason, waitForCompletion);
115-
for (DiscoveryNode node : childNodes) {
116-
transportService.sendRequest(node, BAN_PARENT_ACTION_NAME, banRequest,
147+
for (Transport.Connection connection : childConnections) {
148+
transportService.sendRequest(connection, BAN_PARENT_ACTION_NAME, banRequest, TransportRequestOptions.EMPTY,
117149
new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
118150
@Override
119151
public void handleResponse(TransportResponse.Empty response) {
120-
logger.trace("sent ban for tasks with the parent [{}] to the node [{}]", taskId, node);
152+
logger.trace("sent ban for tasks with the parent [{}] for connection [{}]", taskId, connection);
121153
groupedListener.onResponse(null);
122154
}
123155

124156
@Override
125157
public void handleException(TransportException exp) {
126158
assert ExceptionsHelper.unwrapCause(exp) instanceof ElasticsearchSecurityException == false;
127-
logger.warn("Cannot send ban for tasks with the parent [{}] to the node [{}]", taskId, node);
159+
logger.warn("Cannot send ban for tasks with the parent [{}] for connection [{}]", taskId, connection);
128160
groupedListener.onFailure(exp);
129161
}
130162
});
131163
}
132164
}
133165

134-
private void removeBanOnNodes(CancellableTask task, Collection<DiscoveryNode> childNodes) {
166+
private void removeBanOnChildConnections(CancellableTask task, Collection<Transport.Connection> childConnections) {
135167
final BanParentTaskRequest request =
136168
BanParentTaskRequest.createRemoveBanParentTaskRequest(new TaskId(localNodeId(), task.getId()));
137-
for (DiscoveryNode node : childNodes) {
138-
logger.trace("Sending remove ban for tasks with the parent [{}] to the node [{}]", request.parentTaskId, node);
139-
transportService.sendRequest(node, BAN_PARENT_ACTION_NAME, request, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
140-
@Override
141-
public void handleException(TransportException exp) {
142-
assert ExceptionsHelper.unwrapCause(exp) instanceof ElasticsearchSecurityException == false;
143-
logger.info("failed to remove the parent ban for task {} on node {}", request.parentTaskId, node);
144-
}
145-
});
169+
for (Transport.Connection connection : childConnections) {
170+
logger.trace("Sending remove ban for tasks with the parent [{}] for connection [{}]", request.parentTaskId, connection);
171+
transportService.sendRequest(connection, BAN_PARENT_ACTION_NAME, request, TransportRequestOptions.EMPTY,
172+
new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
173+
@Override
174+
public void handleException(TransportException exp) {
175+
assert ExceptionsHelper.unwrapCause(exp) instanceof ElasticsearchSecurityException == false;
176+
logger.info("failed to remove the parent ban for task {} for connection {}", request.parentTaskId, connection);
177+
}
178+
});
146179
}
147180
}
148181

0 commit comments

Comments
 (0)