1919
2020package org .elasticsearch .cluster .routing .allocation .decider ;
2121
22- import java .util .Set ;
23-
2422import com .carrotsearch .hppc .cursors .ObjectCursor ;
2523import org .apache .logging .log4j .LogManager ;
2624import org .apache .logging .log4j .Logger ;
4240import org .elasticsearch .index .Index ;
4341import org .elasticsearch .index .shard .ShardId ;
4442
43+ import java .util .Set ;
44+
4545import static org .elasticsearch .cluster .routing .allocation .DiskThresholdSettings .CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING ;
4646import static org .elasticsearch .cluster .routing .allocation .DiskThresholdSettings .CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING ;
4747
@@ -138,12 +138,24 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
138138
139139 // subtractLeavingShards is passed as false here, because they still use disk space, and therefore should we should be extra careful
140140 // and take the size into account
141- DiskUsage usage = getDiskUsage (node , allocation , usages , false );
141+ final DiskUsageWithRelocations usage = getDiskUsage (node , allocation , usages , false );
142142 // First, check that the node currently over the low watermark
143143 double freeDiskPercentage = usage .getFreeDiskAsPercentage ();
144144 // Cache the used disk percentage for displaying disk percentages consistent with documentation
145145 double usedDiskPercentage = usage .getUsedDiskAsPercentage ();
146146 long freeBytes = usage .getFreeBytes ();
147+ if (freeBytes < 0L ) {
148+ final long sizeOfRelocatingShards = sizeOfRelocatingShards (node , allocation , false , usage .getPath ());
149+ logger .debug ("fewer free bytes remaining than the size of all incoming shards: " +
150+ "usage {} on node {} including {} bytes of relocations, preventing allocation" ,
151+ usage , node .nodeId (), sizeOfRelocatingShards );
152+
153+ return allocation .decision (Decision .NO , NAME ,
154+ "the node has fewer free bytes remaining than the total size of all incoming shards: " +
155+ "free space [%sB], relocating shards [%sB]" ,
156+ freeBytes + sizeOfRelocatingShards , sizeOfRelocatingShards );
157+ }
158+
147159 ByteSizeValue freeBytesValue = new ByteSizeValue (freeBytes );
148160 if (logger .isTraceEnabled ()) {
149161 logger .trace ("node [{}] has {}% used disk" , node .nodeId (), usedDiskPercentage );
@@ -240,6 +252,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
240252
241253 // Secondly, check that allocating the shard to this node doesn't put it above the high watermark
242254 final long shardSize = getExpectedShardSize (shardRouting , allocation , 0 );
255+ assert shardSize >= 0 : shardSize ;
243256 double freeSpaceAfterShard = freeDiskPercentageAfterShardAssigned (usage , shardSize );
244257 long freeBytesAfterShard = freeBytes - shardSize ;
245258 if (freeBytesAfterShard < diskThresholdSettings .getFreeBytesThresholdHigh ().getBytes ()) {
@@ -266,6 +279,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
266279 diskThresholdSettings .getHighWatermarkRaw (), usedDiskThresholdHigh , freeSpaceAfterShard );
267280 }
268281
282+ assert freeBytesAfterShard >= 0 : freeBytesAfterShard ;
269283 return allocation .decision (Decision .YES , NAME ,
270284 "enough disk for shard on node, free: [%s], shard size: [%s], free after allocating shard: [%s]" ,
271285 freeBytesValue ,
@@ -287,7 +301,7 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
287301
288302 // subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
289303 // since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
290- final DiskUsage usage = getDiskUsage (node , allocation , usages , true );
304+ final DiskUsageWithRelocations usage = getDiskUsage (node , allocation , usages , true );
291305 final String dataPath = clusterInfo .getDataPath (shardRouting );
292306 // If this node is already above the high threshold, the shard cannot remain (get it off!)
293307 final double freeDiskPercentage = usage .getFreeDiskAsPercentage ();
@@ -299,6 +313,16 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
299313 return allocation .decision (Decision .YES , NAME ,
300314 "this shard is not allocated on the most utilized disk and can remain" );
301315 }
316+ if (freeBytes < 0L ) {
317+ final long sizeOfRelocatingShards = sizeOfRelocatingShards (node , allocation , true , usage .getPath ());
318+ logger .debug ("fewer free bytes remaining than the size of all incoming shards: " +
319+ "usage {} on node {} including {} bytes of relocations, shard cannot remain" ,
320+ usage , node .nodeId (), sizeOfRelocatingShards );
321+ return allocation .decision (Decision .NO , NAME ,
322+ "the shard cannot remain on this node because the node has fewer free bytes remaining than the total size of all " +
323+ "incoming shards: free space [%s], relocating shards [%s]" ,
324+ freeBytes + sizeOfRelocatingShards , sizeOfRelocatingShards );
325+ }
302326 if (freeBytes < diskThresholdSettings .getFreeBytesThresholdHigh ().getBytes ()) {
303327 if (logger .isDebugEnabled ()) {
304328 logger .debug ("less than the required {} free bytes threshold ({} bytes free) on node {}, shard cannot remain" ,
@@ -328,8 +352,8 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
328352 "there is enough disk on this node for the shard to remain, free: [%s]" , new ByteSizeValue (freeBytes ));
329353 }
330354
331- private DiskUsage getDiskUsage (RoutingNode node , RoutingAllocation allocation ,
332- ImmutableOpenMap <String , DiskUsage > usages , boolean subtractLeavingShards ) {
355+ private DiskUsageWithRelocations getDiskUsage (RoutingNode node , RoutingAllocation allocation ,
356+ ImmutableOpenMap <String , DiskUsage > usages , boolean subtractLeavingShards ) {
333357 DiskUsage usage = usages .get (node .nodeId ());
334358 if (usage == null ) {
335359 // If there is no usage, and we have other nodes in the cluster,
@@ -340,18 +364,14 @@ private DiskUsage getDiskUsage(RoutingNode node, RoutingAllocation allocation,
340364 node .nodeId (), usage .getTotalBytes (), usage .getFreeBytes (), usage .getFreeDiskAsPercentage ());
341365 }
342366 }
343-
344- if (diskThresholdSettings .includeRelocations ()) {
345- long relocatingShardsSize = sizeOfRelocatingShards (node , allocation , subtractLeavingShards , usage .getPath ());
346- DiskUsage usageIncludingRelocations = new DiskUsage (node .nodeId (), node .node ().getName (), usage .getPath (),
347- usage .getTotalBytes (), usage .getFreeBytes () - relocatingShardsSize );
348- if (logger .isTraceEnabled ()) {
349- logger .trace ("usage without relocations: {}" , usage );
350- logger .trace ("usage with relocations: [{} bytes] {}" , relocatingShardsSize , usageIncludingRelocations );
351- }
352- usage = usageIncludingRelocations ;
367+ final DiskUsageWithRelocations diskUsageWithRelocations = new DiskUsageWithRelocations (usage ,
368+ diskThresholdSettings .includeRelocations ()
369+ ? sizeOfRelocatingShards (node , allocation , subtractLeavingShards , usage .getPath ()) : 0 );
370+ if (logger .isTraceEnabled ()) {
371+ logger .trace ("getDiskUsage(subtractLeavingShards={}) returning {}" , subtractLeavingShards , diskUsageWithRelocations );
353372 }
354- return usage ;
373+
374+ return diskUsageWithRelocations ;
355375 }
356376
357377 /**
@@ -381,7 +401,7 @@ DiskUsage averageUsage(RoutingNode node, ImmutableOpenMap<String, DiskUsage> usa
381401 * @param shardSize Size in bytes of the shard
382402 * @return Percentage of free space after the shard is assigned to the node
383403 */
384- double freeDiskPercentageAfterShardAssigned (DiskUsage usage , Long shardSize ) {
404+ double freeDiskPercentageAfterShardAssigned (DiskUsageWithRelocations usage , Long shardSize ) {
385405 shardSize = (shardSize == null ) ? 0 : shardSize ;
386406 DiskUsage newUsage = new DiskUsage (usage .getNodeId (), usage .getNodeName (), usage .getPath (),
387407 usage .getTotalBytes (), usage .getFreeBytes () - shardSize );
@@ -450,4 +470,59 @@ public static long getExpectedShardSize(ShardRouting shard, RoutingAllocation al
450470 }
451471
452472 }
473+
474+ static class DiskUsageWithRelocations {
475+
476+ private final DiskUsage diskUsage ;
477+ private final long relocatingShardSize ;
478+
479+ DiskUsageWithRelocations (DiskUsage diskUsage , long relocatingShardSize ) {
480+ this .diskUsage = diskUsage ;
481+ this .relocatingShardSize = relocatingShardSize ;
482+ }
483+
484+ @ Override
485+ public String toString () {
486+ return "DiskUsageWithRelocations{" +
487+ "diskUsage=" + diskUsage +
488+ ", relocatingShardSize=" + relocatingShardSize +
489+ '}' ;
490+ }
491+
492+ double getFreeDiskAsPercentage () {
493+ if (getTotalBytes () == 0L ) {
494+ return 100.0 ;
495+ }
496+ return 100.0 * ((double )getFreeBytes () / getTotalBytes ());
497+ }
498+
499+ double getUsedDiskAsPercentage () {
500+ return 100.0 - getFreeDiskAsPercentage ();
501+ }
502+
503+ long getFreeBytes () {
504+ try {
505+ return Math .subtractExact (diskUsage .getFreeBytes (), relocatingShardSize );
506+ } catch (ArithmeticException e ) {
507+ return Long .MAX_VALUE ;
508+ }
509+ }
510+
511+ String getPath () {
512+ return diskUsage .getPath ();
513+ }
514+
515+ String getNodeId () {
516+ return diskUsage .getNodeId ();
517+ }
518+
519+ String getNodeName () {
520+ return diskUsage .getNodeName ();
521+ }
522+
523+ long getTotalBytes () {
524+ return diskUsage .getTotalBytes ();
525+ }
526+ }
527+
453528}
0 commit comments