diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java index 7d16546fb69f..f2817e37b51c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java @@ -58,6 +58,7 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeInfoMetrics; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -367,12 +368,25 @@ public static long getPendingDeletionBlocks(ContainerData containerData) { public static void assertSpaceAvailability(long containerId, HddsVolume volume, int sizeRequested) throws StorageContainerException { final SpaceUsageSource currentUsage = volume.getCurrentUsage(); - final long spared = volume.getFreeSpaceToSpare(currentUsage.getCapacity()); + final long capacity = currentUsage.getCapacity(); + final long available = currentUsage.getAvailable(); + final long hardSpare = volume.getFreeSpaceToSpare(capacity); - if (currentUsage.getAvailable() - spared < sizeRequested) { + if (available - hardSpare < sizeRequested) { + VolumeInfoMetrics stats = volume.getVolumeInfoStats(); + if (stats != null) { + stats.incNumWriteRequestsRejectedHardMinFreeSpace(); + } throw new StorageContainerException("Failed to write " + sizeRequested + " bytes to container " + containerId + " due to volume " + volume + " out of space " - + currentUsage + ", minimum free space spared=" + spared, DISK_OUT_OF_SPACE); + + currentUsage + ", minimum free space spared=" + hardSpare, DISK_OUT_OF_SPACE); + } + final long reportedSpare = volume.getReportedFreeSpaceToSpare(capacity); + if (available - reportedSpare < sizeRequested) { + VolumeInfoMetrics stats = volume.getVolumeInfoStats(); + if (stats != null) { + stats.incNumWriteRequestsInSoftBandMinFreeSpace(); + } } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 41f6d36971ff..1bbcff7a5792 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -25,6 +25,7 @@ import static org.apache.hadoop.hdds.conf.ConfigTag.STORAGE; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.CONFIG_PREFIX; +import com.google.common.annotations.VisibleForTesting; import java.time.Duration; import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.ConfigGroup; @@ -73,6 +74,10 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT = "hdds.datanode.volume.min.free.space.percent"; public static final float HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT = 0.02f; + public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT = + "hdds.datanode.volume.min.free.space.hard.limit.percent"; + public static final float HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT = + 0.015f; public static final String WAIT_ON_ALL_FOLLOWERS = "hdds.datanode.wait.on.all.followers"; public static final String CONTAINER_SCHEMA_V3_ENABLED = "hdds.datanode.container.schema.v3.enabled"; @@ -285,11 +290,10 @@ public class DatanodeConfiguration extends ReconfigurableConfig { defaultValue = "-1", type = ConfigType.SIZE, tags = { OZONE, CONTAINER, STORAGE, MANAGEMENT }, - description = "This determines the free space to be used for closing containers" + - " When the difference between volume capacity and used reaches this number," + - " containers that reside on this volume will be closed and no new containers" + - " would be allocated on this volume." + - " Max of min.free.space and min.free.space.percent will be used as final value." + description = "Minimum free space (bytes) applied together with min.free.space.percent " + + "(reported to SCM in heartbeat as freeSpaceToSpare) and " + + "min.free.space.hard.limit.percent (local write enforcement). " + + "The effective value for each tier is max(this bytes, capacity * ratio)." ) private long minFreeSpace = getDefaultFreeSpace(); @@ -297,13 +301,25 @@ public class DatanodeConfiguration extends ReconfigurableConfig { defaultValue = "0.02", // match HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT type = ConfigType.FLOAT, tags = { OZONE, CONTAINER, STORAGE, MANAGEMENT }, - description = "This determines the free space percent to be used for closing containers" + - " When the difference between volume capacity and used reaches (free.space.percent of volume capacity)," + - " containers that reside on this volume will be closed and no new containers" + - " would be allocated on this volume." + - " Max of min.free.space or min.free.space.percent will be used as final value." + description = "Minimum fraction of volume capacity reported to SCM as freeSpaceToSpare " + + "(heartbeat / storage reports). Local write rejection uses " + + "hdds.datanode.volume.min.free.space.hard.limit.percent instead. " + + "The soft band is the gap between these two (e.g. 2000GB disk: 2% = 40GB reported vs " + + "1.5% = 30GB hard → 10GB band) where the DN may send close-container actions while " + + "writes still succeed." ) private float minFreeSpaceRatio = HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT; + @Config(key = "hdds.datanode.volume.min.free.space.hard.limit.percent", + defaultValue = "0.015", + type = ConfigType.FLOAT, + tags = { OZONE, CONTAINER, STORAGE, MANAGEMENT }, + description = "Minimum fraction of volume capacity reserved for local enforcement: " + + "writes fail when available space would drop below max(this ratio * capacity, " + + "hdds.datanode.volume.min.free.space). Should be <= min.free.space.percent " + + "so SCM can plan for a larger headroom than the DN enforces locally." + ) + private float minFreeSpaceHardLimitRatio = + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT; @Config(key = "hdds.datanode.periodic.disk.check.interval.minutes", defaultValue = "60", @@ -753,6 +769,23 @@ private void validateMinFreeSpace() { minFreeSpaceRatio); minFreeSpaceRatio = HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT; } + if (minFreeSpaceHardLimitRatio > 1 || minFreeSpaceHardLimitRatio < 0) { + LOG.warn("{} = {} is invalid, should be between 0 and 1; resetting to default {}", + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, + minFreeSpaceHardLimitRatio, + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT); + minFreeSpaceHardLimitRatio = + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT; + } + if (minFreeSpaceHardLimitRatio > minFreeSpaceRatio) { + LOG.warn("{} = {} must not exceed {} = {}, setting hard limit to soft limit. " + + "Set hard.limit.percent <= min.free.space.percent to enable the soft band.", + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, + minFreeSpaceHardLimitRatio, + HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, + minFreeSpaceRatio); + minFreeSpaceHardLimitRatio = minFreeSpaceRatio; + } if (minFreeSpace < 0) { minFreeSpace = getDefaultFreeSpace(); @@ -819,10 +852,33 @@ public void setContainerCloseThreads(int containerCloseThreads) { this.containerCloseThreads = containerCloseThreads; } + /** + * Minimum free space reported to SCM (freeSpaceToSpare in storage reports). + */ public long getMinFreeSpace(long capacity) { return Math.max((long) (capacity * minFreeSpaceRatio), minFreeSpace); } + /** + * Minimum free space enforced locally for writes (disk full / out-of-space) + * and for choosing a volume for a new container (same threshold as writes). + */ + public long getHardLimitMinFreeSpace(long capacity) { + return Math.max((long) (capacity * minFreeSpaceHardLimitRatio), minFreeSpace); + } + + /** + * Width of the soft band: reported spare minus hard spare. For example, with 2000GB capacity, + * 2% reported (40GB) and 1.5% hard (30GB), this is 10GB — the gap where the DN may send + * close-container actions while writes still succeed. + */ + @VisibleForTesting + public long getSoftBandMinFreeSpaceWidth(long capacity) { + long reported = getMinFreeSpace(capacity); + long hard = getHardLimitMinFreeSpace(capacity); + return Math.max(0L, reported - hard); + } + public long getMinFreeSpace() { return minFreeSpace; } @@ -831,6 +887,11 @@ public float getMinFreeSpaceRatio() { return minFreeSpaceRatio; } + @VisibleForTesting + public float getMinFreeSpaceHardLimitRatio() { + return minFreeSpaceHardLimitRatio; + } + public long getPeriodicDiskCheckIntervalMinutes() { return periodicDiskCheckIntervalMinutes; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java index bbd2bc97517f..0b320756cef3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/AvailableSpaceFilter.java @@ -24,6 +24,9 @@ /** * Filter for selecting volumes with enough space for a new container. + * Uses the hard min-free spare (same as write checks), not the SCM-reported spare in + * {@link StorageLocationReport#getFreeSpaceToSpare()}. The gap between reported and hard is the + * soft band (e.g. 40GB − 30GB on a 2000GB disk with 2% vs 1.5%). * Keeps track of ineligible volumes for logging/debug purposes. */ public class AvailableSpaceFilter implements Predicate { @@ -39,9 +42,25 @@ public AvailableSpaceFilter(long requiredSpace) { @Override public boolean test(HddsVolume vol) { StorageLocationReport report = vol.getReport(); - long available = report.getUsableSpace(); + long capacity = report.getCapacity(); + long spareAtHardLimit = vol.getFreeSpaceToSpare(capacity); + long spareReported = vol.getReportedFreeSpaceToSpare(capacity); + long available = + report.getRemaining() - report.getCommitted() - spareAtHardLimit; + long availableAtReportedSpare = + report.getRemaining() - report.getCommitted() - spareReported; + boolean hasEnoughSpace = available > requiredSpace; + VolumeInfoMetrics stats = vol.getVolumeInfoStats(); + if (stats != null) { + if (!hasEnoughSpace) { + stats.incNumContainerCreateRequestsRejectedHardMinFreeSpace(); + } else if (availableAtReportedSpare <= requiredSpace) { + stats.incNumContainerCreateRequestsInSoftBandMinFreeSpace(); + } + } + mostAvailableSpace = Math.max(available, mostAvailableSpace); if (!hasEnoughSpace) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index a4f1afa08ea8..46fb0d741a2f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -207,7 +207,7 @@ protected StorageLocationReport.Builder reportBuilder() { StorageLocationReport.Builder builder = super.reportBuilder(); if (!builder.isFailed()) { builder.setCommitted(getCommittedBytes()) - .setFreeSpaceToSpare(getFreeSpaceToSpare(builder.getCapacity())); + .setFreeSpaceToSpare(getReportedFreeSpaceToSpare(builder.getCapacity())); } return builder; } @@ -409,10 +409,22 @@ public long getCommittedBytes() { return committedBytes.get(); } - public long getFreeSpaceToSpare(long volumeCapacity) { + /** + * Minimum free space reported to SCM (heartbeat), from + * {@code hdds.datanode.volume.min.free.space.percent}. + */ + public long getReportedFreeSpaceToSpare(long volumeCapacity) { return getDatanodeConfig().getMinFreeSpace(volumeCapacity); } + /** + * Minimum free space enforced locally for writes (see + * {@code hdds.datanode.volume.min.free.space.hard.limit.percent}). + */ + public long getFreeSpaceToSpare(long volumeCapacity) { + return getDatanodeConfig().getHardLimitMinFreeSpace(volumeCapacity); + } + @Override public void setGatherContainerUsages(Function gatherContainerUsages) { this.gatherContainerUsages = gatherContainerUsages; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java index 8340c1c4f7fa..0cb0c9d56a98 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/VolumeInfoMetrics.java @@ -76,6 +76,18 @@ public class VolumeInfoMetrics implements MetricsSource { @Metric("Number of scans skipped for the volume") private MutableCounterLong numScansSkipped; + @Metric("Write requests allowed while usable space is between the reported (soft) and hard min-free-space thresholds") + private MutableCounterLong numWriteRequestsInSoftBandMinFreeSpace; + + @Metric("Write requests rejected because the hard min-free-space limit would be violated") + private MutableCounterLong numWriteRequestsRejectedHardMinFreeSpace; + @Metric("Container create allowed while usable space is between the reported (soft) " + + "and hard min-free-space thresholds") + private MutableCounterLong numContainerCreateRequestsInSoftBandMinFreeSpace; + + @Metric("Container create requests rejected because the hard min-free-space limit would be violated") + private MutableCounterLong numContainerCreateRequestsRejectedHardMinFreeSpace; + /** * @param identifier Typically, path to volume root. E.g. /data/hdds */ @@ -185,6 +197,38 @@ public void incNumScansSkipped() { numScansSkipped.incr(); } + public long getNumWriteRequestsInSoftBandMinFreeSpace() { + return numWriteRequestsInSoftBandMinFreeSpace.value(); + } + + public void incNumWriteRequestsInSoftBandMinFreeSpace() { + numWriteRequestsInSoftBandMinFreeSpace.incr(); + } + + public long getNumWriteRequestsRejectedHardMinFreeSpace() { + return numWriteRequestsRejectedHardMinFreeSpace.value(); + } + + public void incNumWriteRequestsRejectedHardMinFreeSpace() { + numWriteRequestsRejectedHardMinFreeSpace.incr(); + } + + public long getNumContainerCreateRequestsInSoftBandMinFreeSpace() { + return numContainerCreateRequestsInSoftBandMinFreeSpace.value(); + } + + public void incNumContainerCreateRequestsInSoftBandMinFreeSpace() { + numContainerCreateRequestsInSoftBandMinFreeSpace.incr(); + } + + public long getNumContainerCreateRequestsRejectedHardMinFreeSpace() { + return numContainerCreateRequestsRejectedHardMinFreeSpace.value(); + } + + public void incNumContainerCreateRequestsRejectedHardMinFreeSpace() { + numContainerCreateRequestsRejectedHardMinFreeSpace.incr(); + } + @Override public void getMetrics(MetricsCollector collector, boolean all) { MetricsRecordBuilder builder = collector.addRecord(metricsSourceName); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java index e262e795aa66..549fc126aef6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java @@ -29,6 +29,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.io.File; @@ -42,13 +44,18 @@ import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.fs.SpaceUsageSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ByteStringConversion; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.common.ChunkBuffer; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.VolumeInfoMetrics; import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -227,4 +234,41 @@ private static void assertDetailsEquals(DatanodeDetails expected, assertEquals(expected.getInitialVersion(), actual.getInitialVersion()); assertEquals(expected.getIpAddress(), actual.getIpAddress()); } + + @Test + public void assertSpaceAvailabilityIncrementsSoftBandWhenBetweenReportedAndHard() + throws Exception { + HddsVolume volume = mock(HddsVolume.class); + VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class); + SpaceUsageSource.Fixed usage = new SpaceUsageSource.Fixed(1000L, 100L, 900L); + when(volume.getCurrentUsage()).thenReturn(usage); + when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L); + when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L); + when(volume.getVolumeInfoStats()).thenReturn(metrics); + when(volume.toString()).thenReturn("mockVolume"); + + ContainerUtils.assertSpaceAvailability(1L, volume, 50); + + verify(metrics).incNumWriteRequestsInSoftBandMinFreeSpace(); + verify(metrics, never()).incNumWriteRequestsRejectedHardMinFreeSpace(); + } + + @Test + public void assertSpaceAvailabilityIncrementsHardRejectWhenHardLimitViolated() { + HddsVolume volume = mock(HddsVolume.class); + VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class); + SpaceUsageSource.Fixed usage = new SpaceUsageSource.Fixed(1000L, 100L, 900L); + when(volume.getCurrentUsage()).thenReturn(usage); + when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L); + when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L); + when(volume.getVolumeInfoStats()).thenReturn(metrics); + when(volume.toString()).thenReturn("mockVolume"); + + StorageContainerException ex = assertThrows(StorageContainerException.class, + () -> ContainerUtils.assertSpaceAvailability(1L, volume, 80)); + assertEquals(Result.DISK_OUT_OF_SPACE, ex.getResult()); + + verify(metrics).incNumWriteRequestsRejectedHardMinFreeSpace(); + verify(metrics, never()).incNumWriteRequestsInSoftBandMinFreeSpace(); + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestDatanodeConfiguration.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestDatanodeConfiguration.java index 5012526782aa..0ea65ad6e529 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestDatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestDatanodeConfiguration.java @@ -29,6 +29,7 @@ import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.FAILED_DB_VOLUMES_TOLERATED_KEY; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.FAILED_METADATA_VOLUMES_TOLERATED_KEY; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.FAILED_VOLUMES_TOLERATED_DEFAULT; +import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT; import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.PERIODIC_DISK_CHECK_INTERVAL_MINUTES_KEY; @@ -184,12 +185,22 @@ public void isCreatedWitDefaultValues() { subject.getBlockDeleteCommandWorkerInterval()); assertEquals(DatanodeConfiguration.getDefaultFreeSpace(), subject.getMinFreeSpace()); assertEquals(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT, subject.getMinFreeSpaceRatio()); + assertEquals(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT, + subject.getMinFreeSpaceHardLimitRatio()); final long oneGB = 1024 * 1024 * 1024; // capacity is less, consider default min_free_space assertEquals(DatanodeConfiguration.getDefaultFreeSpace(), subject.getMinFreeSpace(oneGB)); + assertEquals(DatanodeConfiguration.getDefaultFreeSpace(), subject.getHardLimitMinFreeSpace(oneGB)); + assertEquals(0L, subject.getSoftBandMinFreeSpaceWidth(oneGB)); // capacity is large, consider min_free_space_percent, max(min_free_space, min_free_space_percent * capacity)ß assertEquals(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT * oneGB * oneGB, subject.getMinFreeSpace(oneGB * oneGB)); + assertEquals(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT * oneGB * oneGB, + subject.getHardLimitMinFreeSpace(oneGB * oneGB)); + // e.g. 2000GB: 40GB reported − 30GB hard = 10GB soft bandwidth (derived, not configured) + assertEquals( + subject.getMinFreeSpace(oneGB * oneGB) - subject.getHardLimitMinFreeSpace(oneGB * oneGB), + subject.getSoftBandMinFreeSpaceWidth(oneGB * oneGB)); // Verify that no warnings were logged when using default values String logOutput = logCapturer.getOutput(); @@ -224,6 +235,34 @@ void useMaxIfBothMinFreeSpacePropertiesSet() { } } + /** + * If hard limit percent is greater than soft (reported) percent, {@link DatanodeConfiguration} + * uses the hard threshold for SCM-reported spare as well, so there is no negative "soft band". + */ + @Test + void whenHardRatioExceedsSoftRatioReportedSpareMatchesHardOnly() { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.unset(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, 0.01f); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, 0.02f); + + DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class); + long capacityBytes = 1000L * 1024 * 1024 * 1024; + + assertEquals(subject.getHardLimitMinFreeSpace(capacityBytes), + subject.getMinFreeSpace(capacityBytes)); + assertEquals(0L, subject.getSoftBandMinFreeSpaceWidth(capacityBytes)); + } + + @Test + void rejectsInvalidMinFreeSpaceHardLimitRatio() { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, 1.5f); + DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class); + assertEquals(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT, + subject.getMinFreeSpaceHardLimitRatio()); + } + @ParameterizedTest @ValueSource(longs = {1_000, 10_000, 100_000}) void usesFixedMinFreeSpace(long bytes) { @@ -231,6 +270,8 @@ void usesFixedMinFreeSpace(long bytes) { conf.setLong(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE, bytes); // keeping %cent low so that min free space is picked up conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, 0.00001f); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, + 0.00001f); DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class); @@ -247,7 +288,10 @@ void calculatesMinFreeSpaceRatio(int percent) { OzoneConfiguration conf = new OzoneConfiguration(); // keeping min free space low so that %cent is picked up after calculation conf.set(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE, "1000"); // set in ozone-site.xml - conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, percent / 100.0f); + float softRatio = percent / 100.0f; + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, softRatio); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, + Math.min(softRatio, 0.01f)); DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestAvailableSpaceFilter.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestAvailableSpaceFilter.java new file mode 100644 index 000000000000..d1ec64fb180e --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestAvailableSpaceFilter.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.volume; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link AvailableSpaceFilter}. + */ +public class TestAvailableSpaceFilter { + + @Test + public void testIncrementsSoftBandWhenBetweenReportedAndHard() { + HddsVolume volume = mock(HddsVolume.class); + VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class); + StorageLocationReport report = mock(StorageLocationReport.class); + when(volume.getReport()).thenReturn(report); + when(report.getCapacity()).thenReturn(1000L); + when(report.getRemaining()).thenReturn(100L); + when(report.getCommitted()).thenReturn(0L); + when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L); + when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L); + when(volume.getVolumeInfoStats()).thenReturn(metrics); + + AvailableSpaceFilter filter = new AvailableSpaceFilter(50L); + assertTrue(filter.test(volume)); + + verify(metrics).incNumContainerCreateRequestsInSoftBandMinFreeSpace(); + verify(metrics, never()).incNumContainerCreateRequestsRejectedHardMinFreeSpace(); + } + + @Test + public void testIncrementsHardRejectWhenHardLimitViolated() { + HddsVolume volume = mock(HddsVolume.class); + VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class); + StorageLocationReport report = mock(StorageLocationReport.class); + when(volume.getReport()).thenReturn(report); + when(report.getCapacity()).thenReturn(1000L); + when(report.getRemaining()).thenReturn(100L); + when(report.getCommitted()).thenReturn(0L); + when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L); + when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L); + when(volume.getVolumeInfoStats()).thenReturn(metrics); + + AvailableSpaceFilter filter = new AvailableSpaceFilter(80L); + assertFalse(filter.test(volume)); + + verify(metrics).incNumContainerCreateRequestsRejectedHardMinFreeSpace(); + verify(metrics, never()).incNumContainerCreateRequestsInSoftBandMinFreeSpace(); + } + + @Test + public void testNoMetricIncrementWhenWellAboveSoftBand() { + HddsVolume volume = mock(HddsVolume.class); + VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class); + StorageLocationReport report = mock(StorageLocationReport.class); + when(volume.getReport()).thenReturn(report); + when(report.getCapacity()).thenReturn(1000L); + when(report.getRemaining()).thenReturn(1000L); + when(report.getCommitted()).thenReturn(0L); + when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L); + when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L); + when(volume.getVolumeInfoStats()).thenReturn(metrics); + + AvailableSpaceFilter filter = new AvailableSpaceFilter(50L); + assertTrue(filter.test(volume)); + + verify(metrics, never()).getNumContainerCreateRequestsInSoftBandMinFreeSpace(); + verify(metrics, never()).incNumWriteRequestsRejectedHardMinFreeSpace(); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestCapacityVolumeChoosingPolicy.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestCapacityVolumeChoosingPolicy.java index deae4f83951b..7917ebf80bd9 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestCapacityVolumeChoosingPolicy.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestCapacityVolumeChoosingPolicy.java @@ -129,7 +129,7 @@ public void throwsDiskOutOfSpaceIfRequestMoreThanAvailable() { String msg = e.getMessage(); assertThat(msg) .contains("No volumes have enough space for a new container. " + - "Most available space: 240 bytes"); + "Most available space: 243 bytes"); } @Test diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java index 36fabff1fe87..91bc9caa2c24 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestRoundRobinVolumeChoosingPolicy.java @@ -114,7 +114,7 @@ public void throwsDiskOutOfSpaceIfRequestMoreThanAvailable() { String msg = e.getMessage(); assertThat(msg).contains("No volumes have enough space for a new container. " + - "Most available space: 140 bytes"); + "Most available space: 143 bytes"); } @Test diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java index 9ceb0a99e9f0..abfef6fbffdc 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestReplicationSupervisor.java @@ -419,7 +419,7 @@ public void testReplicationImportReserveSpace(ContainerLayoutVersion layout) assertEquals(0, usedSpace); // Increase committed bytes so that volume has only remaining 3 times container size space long minFreeSpace = - conf.getObject(DatanodeConfiguration.class).getMinFreeSpace(vol1.getCurrentUsage().getCapacity()); + conf.getObject(DatanodeConfiguration.class).getHardLimitMinFreeSpace(vol1.getCurrentUsage().getCapacity()); long initialCommittedBytes = vol1.getCurrentUsage().getCapacity() - containerMaxSize * 3 - minFreeSpace; vol1.incCommittedBytes(initialCommittedBytes); ContainerReplicator replicator = diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/TestDatanodeMinFreeSpaceIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/TestDatanodeMinFreeSpaceIntegration.java new file mode 100644 index 000000000000..ca0e231a2077 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/TestDatanodeMinFreeSpaceIntegration.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.dn; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.function.BooleanSupplier; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** + * Integration tests: For min free space as hard and soft limit. + */ +@Timeout(300) +public class TestDatanodeMinFreeSpaceIntegration { + + @Test + public void storageReportsAtScmMatchSoftMinFreeSpaceFromConfig() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.unset(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, 0.03f); + conf.setFloat(DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT, 0.015f); + conf.setTimeDuration(HDDS_HEARTBEAT_INTERVAL, 2, SECONDS); + + DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class); + + try (MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(1) + .build()) { + cluster.waitForClusterToBeReady(); + cluster.waitTobeOutOfSafeMode(); + + HddsDatanodeService dnService = cluster.getHddsDatanodes().get(0); + DatanodeDetails dn = dnService.getDatanodeDetails(); + + StorageContainerManager scm = cluster.getStorageContainerManager(); + NodeManager nm = scm.getScmNodeManager(); + + BooleanSupplier softSpareVisibleAtScm = + () -> storageReportsMatchSoftMinFree(nm, dn, dnConf); + GenericTestUtils.waitFor(softSpareVisibleAtScm, 500, 120_000); + + DatanodeInfo info = nm.getDatanodeInfo(dn); + assertNotNull(info); + assertFalse(info.getStorageReports().isEmpty()); + + for (StorageReportProto report : info.getStorageReports()) { + if (report.getFailed()) { + continue; + } + long capacity = report.getCapacity(); + assertTrue(capacity > 0, "data volume should have positive capacity"); + + long expectedSoft = dnConf.getMinFreeSpace(capacity); + long expectedHard = dnConf.getHardLimitMinFreeSpace(capacity); + long expectedBand = dnConf.getSoftBandMinFreeSpaceWidth(capacity); + + assertEquals(expectedSoft, report.getFreeSpaceToSpare(), + "freeSpaceToSpare in SCM storage report should match soft min-free for capacity"); + assertThat(expectedSoft).isGreaterThanOrEqualTo(expectedHard); + assertThat(expectedBand).isGreaterThan(0L); + assertEquals(expectedBand, expectedSoft - expectedHard); + } + } + } + + /** + * SCM has caught up with DN heartbeats: every non-failed data report's {@code freeSpaceToSpare} + * equals the configured soft min-free for that volume capacity. + */ + private static boolean storageReportsMatchSoftMinFree( + NodeManager nm, DatanodeDetails dn, DatanodeConfiguration dnConf) { + DatanodeInfo info = nm.getDatanodeInfo(dn); + if (info == null || info.getStorageReports().isEmpty()) { + return false; + } + boolean anyDataVolume = false; + for (StorageReportProto r : info.getStorageReports()) { + if (r.getFailed() || r.getCapacity() <= 0) { + continue; + } + anyDataVolume = true; + long expectedSoft = dnConf.getMinFreeSpace(r.getCapacity()); + if (expectedSoft != r.getFreeSpaceToSpare()) { + return false; + } + } + return anyDataVolume; + } +}