@@ -139,12 +139,25 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
139139
140140 // subtractLeavingShards is passed as false here, because they still use disk space, and therefore should we should be extra careful
141141 // and take the size into account
142- DiskUsage usage = getDiskUsage (node , allocation , usages , false );
142+ final DiskUsageWithRelocations usage = getDiskUsage (node , allocation , usages , false );
143143 // First, check that the node currently over the low watermark
144144 double freeDiskPercentage = usage .getFreeDiskAsPercentage ();
145145 // Cache the used disk percentage for displaying disk percentages consistent with documentation
146146 double usedDiskPercentage = usage .getUsedDiskAsPercentage ();
147147 long freeBytes = usage .getFreeBytes ();
148+ if (freeBytes < 0L ) {
149+ final long sizeOfRelocatingShards = sizeOfRelocatingShards (node , false , usage .getPath (),
150+ allocation .clusterInfo (), allocation .metaData (), allocation .routingTable ());
151+ logger .debug ("fewer free bytes remaining than the size of all incoming shards: " +
152+ "usage {} on node {} including {} bytes of relocations, preventing allocation" ,
153+ usage , node .nodeId (), sizeOfRelocatingShards );
154+
155+ return allocation .decision (Decision .NO , NAME ,
156+ "the node has fewer free bytes remaining than the total size of all incoming shards: " +
157+ "free space [%sB], relocating shards [%sB]" ,
158+ freeBytes + sizeOfRelocatingShards , sizeOfRelocatingShards );
159+ }
160+
148161 ByteSizeValue freeBytesValue = new ByteSizeValue (freeBytes );
149162 if (logger .isTraceEnabled ()) {
150163 logger .trace ("node [{}] has {}% used disk" , node .nodeId (), usedDiskPercentage );
@@ -242,6 +255,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
242255 // Secondly, check that allocating the shard to this node doesn't put it above the high watermark
243256 final long shardSize = getExpectedShardSize (shardRouting , 0L ,
244257 allocation .clusterInfo (), allocation .metaData (), allocation .routingTable ());
258+ assert shardSize >= 0 : shardSize ;
245259 double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned (usage , shardSize );
246260 long freeBytesAfterShard = freeBytes - shardSize ;
247261 if (freeBytesAfterShard < diskThresholdSettings .getFreeBytesThresholdHigh ().getBytes ()) {
@@ -268,6 +282,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
268282 diskThresholdSettings .getHighWatermarkRaw (), usedDiskThresholdHigh , freeSpaceAfterShard );
269283 }
270284
285+ assert freeBytesAfterShard >= 0 : freeBytesAfterShard ;
271286 return allocation .decision (Decision .YES , NAME ,
272287 "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]" ,
273288 freeBytesValue ,
@@ -289,7 +304,7 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
289304
290305 // subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
291306 // since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
292- final DiskUsage usage = getDiskUsage (node , allocation , usages , true );
307+ final DiskUsageWithRelocations usage = getDiskUsage (node , allocation , usages , true );
293308 final String dataPath = clusterInfo .getDataPath (shardRouting );
294309 // If this node is already above the high threshold, the shard cannot remain (get it off!)
295310 final double freeDiskPercentage = usage .getFreeDiskAsPercentage ();
@@ -301,6 +316,17 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
301316 return allocation .decision (Decision .YES , NAME ,
302317 "this shard is not allocated on the most utilized disk and can remain" );
303318 }
319+ if (freeBytes < 0L ) {
320+ final long sizeOfRelocatingShards = sizeOfRelocatingShards (node , false , usage .getPath (),
321+ allocation .clusterInfo (), allocation .metaData (), allocation .routingTable ());
322+ logger .debug ("fewer free bytes remaining than the size of all incoming shards: " +
323+ "usage {} on node {} including {} bytes of relocations, shard cannot remain" ,
324+ usage , node .nodeId (), sizeOfRelocatingShards );
325+ return allocation .decision (Decision .NO , NAME ,
326+ "the shard cannot remain on this node because the node has fewer free bytes remaining than the total size of all " +
327+ "incoming shards: free space [%s], relocating shards [%s]" ,
328+ freeBytes + sizeOfRelocatingShards , sizeOfRelocatingShards );
329+ }
304330 if (freeBytes < diskThresholdSettings .getFreeBytesThresholdHigh ().getBytes ()) {
305331 if (logger .isDebugEnabled ()) {
306332 logger .debug ("less than the required {} free bytes threshold ({} bytes free) on node {}, shard cannot remain" ,
@@ -330,8 +356,8 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
330356 "there is enough disk on this node for the shard to remain, free: [%s]" , new ByteSizeValue (freeBytes ));
331357 }
332358
333- private DiskUsage getDiskUsage (RoutingNode node , RoutingAllocation allocation ,
334- ImmutableOpenMap <String , DiskUsage > usages , boolean subtractLeavingShards ) {
359+ private DiskUsageWithRelocations getDiskUsage (RoutingNode node , RoutingAllocation allocation ,
360+ ImmutableOpenMap <String , DiskUsage > usages , boolean subtractLeavingShards ) {
335361 DiskUsage usage = usages .get (node .nodeId ());
336362 if (usage == null ) {
337363 // If there is no usage, and we have other nodes in the cluster,
@@ -341,13 +367,11 @@ private DiskUsage getDiskUsage(RoutingNode node, RoutingAllocation allocation,
341367 node .nodeId (), usage .getTotalBytes (), usage .getFreeBytes (), usage .getFreeDiskAsPercentage ());
342368 }
343369
344- final long relocatingShardsSize = sizeOfRelocatingShards (node , subtractLeavingShards , usage .getPath (),
345- allocation .clusterInfo (), allocation .metaData (), allocation .routingTable ());
346- final DiskUsage usageIncludingRelocations = new DiskUsage (node .nodeId (), node .node ().getName (), usage .getPath (),
347- usage .getTotalBytes (), usage .getFreeBytes () - relocatingShardsSize );
348- logger .trace ("getDiskUsage: usage [{}] with [{}] bytes relocating yields [{}]" ,
349- usage , relocatingShardsSize , usageIncludingRelocations );
350- return usageIncludingRelocations ;
370+ final DiskUsageWithRelocations diskUsageWithRelocations = new DiskUsageWithRelocations (usage ,
371+ sizeOfRelocatingShards (node , subtractLeavingShards , usage .getPath (),
372+ allocation .clusterInfo (), allocation .metaData (), allocation .routingTable ()));
373+ logger .trace ("getDiskUsage(subtractLeavingShards={}) returning {}" , subtractLeavingShards , diskUsageWithRelocations );
374+ return diskUsageWithRelocations ;
351375 }
352376
353377 /**
@@ -377,7 +401,7 @@ DiskUsage averageUsage(RoutingNode node, ImmutableOpenMap<String, DiskUsage> usa
377401 * @param shardSize Size in bytes of the shard
378402 * @return Percentage of free space after the shard is assigned to the node
379403 */
380- double freeDiskPercentageAfterShardAssigned (DiskUsage usage , Long shardSize ) {
404+ double freeDiskPercentageAfterShardAssigned (DiskUsageWithRelocations usage , Long shardSize ) {
381405 shardSize = (shardSize == null ) ? 0 : shardSize ;
382406 DiskUsage newUsage = new DiskUsage (usage .getNodeId (), usage .getNodeName (), usage .getPath (),
383407 usage .getTotalBytes (), usage .getFreeBytes () - shardSize );
@@ -445,4 +469,59 @@ public static long getExpectedShardSize(ShardRouting shard, long defaultValue, C
445469 return clusterInfo .getShardSize (shard , defaultValue );
446470 }
447471 }
472+
473+ static class DiskUsageWithRelocations {
474+
475+ private final DiskUsage diskUsage ;
476+ private final long relocatingShardSize ;
477+
478+ DiskUsageWithRelocations (DiskUsage diskUsage , long relocatingShardSize ) {
479+ this .diskUsage = diskUsage ;
480+ this .relocatingShardSize = relocatingShardSize ;
481+ }
482+
483+ @ Override
484+ public String toString () {
485+ return "DiskUsageWithRelocations{" +
486+ "diskUsage=" + diskUsage +
487+ ", relocatingShardSize=" + relocatingShardSize +
488+ '}' ;
489+ }
490+
491+ double getFreeDiskAsPercentage () {
492+ if (getTotalBytes () == 0L ) {
493+ return 100.0 ;
494+ }
495+ return 100.0 * ((double )getFreeBytes () / getTotalBytes ());
496+ }
497+
498+ double getUsedDiskAsPercentage () {
499+ return 100.0 - getFreeDiskAsPercentage ();
500+ }
501+
502+ long getFreeBytes () {
503+ try {
504+ return Math .subtractExact (diskUsage .getFreeBytes (), relocatingShardSize );
505+ } catch (ArithmeticException e ) {
506+ return Long .MAX_VALUE ;
507+ }
508+ }
509+
510+ String getPath () {
511+ return diskUsage .getPath ();
512+ }
513+
514+ String getNodeId () {
515+ return diskUsage .getNodeId ();
516+ }
517+
518+ String getNodeName () {
519+ return diskUsage .getNodeName ();
520+ }
521+
522+ long getTotalBytes () {
523+ return diskUsage .getTotalBytes ();
524+ }
525+ }
526+
448527}
0 commit comments