Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.VolumeInfoMetrics;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -367,12 +368,25 @@ public static long getPendingDeletionBlocks(ContainerData containerData) {
public static void assertSpaceAvailability(long containerId, HddsVolume volume, int sizeRequested)
throws StorageContainerException {
final SpaceUsageSource currentUsage = volume.getCurrentUsage();
final long spared = volume.getFreeSpaceToSpare(currentUsage.getCapacity());
final long capacity = currentUsage.getCapacity();
final long available = currentUsage.getAvailable();
final long hardSpare = volume.getFreeSpaceToSpare(capacity);

if (currentUsage.getAvailable() - spared < sizeRequested) {
if (available - hardSpare < sizeRequested) {
VolumeInfoMetrics stats = volume.getVolumeInfoStats();
if (stats != null) {
stats.incNumWriteRequestsRejectedHardMinFreeSpace();
}
throw new StorageContainerException("Failed to write " + sizeRequested + " bytes to container "
+ containerId + " due to volume " + volume + " out of space "
+ currentUsage + ", minimum free space spared=" + spared, DISK_OUT_OF_SPACE);
+ currentUsage + ", minimum free space spared=" + hardSpare, DISK_OUT_OF_SPACE);
}
final long reportedSpare = volume.getReportedFreeSpaceToSpare(capacity);
if (available - reportedSpare < sizeRequested) {
VolumeInfoMetrics stats = volume.getVolumeInfoStats();
if (stats != null) {
stats.incNumWriteRequestsInSoftBandMinFreeSpace();
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import static org.apache.hadoop.hdds.conf.ConfigTag.STORAGE;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.CONFIG_PREFIX;

import com.google.common.annotations.VisibleForTesting;
import java.time.Duration;
import org.apache.hadoop.hdds.conf.Config;
import org.apache.hadoop.hdds.conf.ConfigGroup;
Expand Down Expand Up @@ -73,6 +74,10 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT =
"hdds.datanode.volume.min.free.space.percent";
public static final float HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT = 0.02f;
public static final String HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT =
"hdds.datanode.volume.min.free.space.hard.limit.percent";
public static final float HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT =
0.015f;

public static final String WAIT_ON_ALL_FOLLOWERS = "hdds.datanode.wait.on.all.followers";
public static final String CONTAINER_SCHEMA_V3_ENABLED = "hdds.datanode.container.schema.v3.enabled";
Expand Down Expand Up @@ -285,25 +290,36 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
defaultValue = "-1",
type = ConfigType.SIZE,
tags = { OZONE, CONTAINER, STORAGE, MANAGEMENT },
description = "This determines the free space to be used for closing containers" +
" When the difference between volume capacity and used reaches this number," +
" containers that reside on this volume will be closed and no new containers" +
" would be allocated on this volume." +
" Max of min.free.space and min.free.space.percent will be used as final value."
description = "Minimum free space (bytes) applied together with min.free.space.percent "
+ "(reported to SCM in heartbeat as freeSpaceToSpare) and "
+ "min.free.space.hard.limit.percent (local write enforcement). "
+ "The effective value for each tier is max(this bytes, capacity * ratio)."
)
private long minFreeSpace = getDefaultFreeSpace();

@Config(key = "hdds.datanode.volume.min.free.space.percent",
defaultValue = "0.02", // match HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT
type = ConfigType.FLOAT,
tags = { OZONE, CONTAINER, STORAGE, MANAGEMENT },
description = "This determines the free space percent to be used for closing containers" +
" When the difference between volume capacity and used reaches (free.space.percent of volume capacity)," +
" containers that reside on this volume will be closed and no new containers" +
" would be allocated on this volume." +
" Max of min.free.space or min.free.space.percent will be used as final value."
description = "Minimum fraction of volume capacity reported to SCM as freeSpaceToSpare "
+ "(heartbeat / storage reports). Local write rejection uses "
+ "hdds.datanode.volume.min.free.space.hard.limit.percent instead. "
+ "The soft band is the gap between these two (e.g. 2000GB disk: 2% = 40GB reported vs "
+ "1.5% = 30GB hard → 10GB band) where the DN may send close-container actions while "
+ "writes still succeed."
)
private float minFreeSpaceRatio = HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT;
@Config(key = "hdds.datanode.volume.min.free.space.hard.limit.percent",
defaultValue = "0.015",
type = ConfigType.FLOAT,
tags = { OZONE, CONTAINER, STORAGE, MANAGEMENT },
description = "Minimum fraction of volume capacity reserved for local enforcement: "
+ "writes fail when available space would drop below max(this ratio * capacity, "
+ "hdds.datanode.volume.min.free.space). Should be <= min.free.space.percent "
+ "so SCM can plan for a larger headroom than the DN enforces locally."
)
private float minFreeSpaceHardLimitRatio =
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT;

@Config(key = "hdds.datanode.periodic.disk.check.interval.minutes",
defaultValue = "60",
Expand Down Expand Up @@ -753,6 +769,23 @@ private void validateMinFreeSpace() {
minFreeSpaceRatio);
minFreeSpaceRatio = HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT;
}
if (minFreeSpaceHardLimitRatio > 1 || minFreeSpaceHardLimitRatio < 0) {
LOG.warn("{} = {} is invalid, should be between 0 and 1; resetting to default {}",
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT,
minFreeSpaceHardLimitRatio,
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT);
minFreeSpaceHardLimitRatio =
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT_DEFAULT;
}
if (minFreeSpaceHardLimitRatio > minFreeSpaceRatio) {
LOG.warn("{} = {} must not exceed {} = {}, setting hard limit to soft limit. "
+ "Set hard.limit.percent <= min.free.space.percent to enable the soft band.",
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_HARD_LIMIT_PERCENT,
minFreeSpaceHardLimitRatio,
HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT,
minFreeSpaceRatio);
minFreeSpaceHardLimitRatio = minFreeSpaceRatio;
}

if (minFreeSpace < 0) {
minFreeSpace = getDefaultFreeSpace();
Expand Down Expand Up @@ -819,10 +852,33 @@ public void setContainerCloseThreads(int containerCloseThreads) {
this.containerCloseThreads = containerCloseThreads;
}

/**
* Minimum free space reported to SCM (freeSpaceToSpare in storage reports).
*/
public long getMinFreeSpace(long capacity) {
return Math.max((long) (capacity * minFreeSpaceRatio), minFreeSpace);
}

/**
* Minimum free space enforced locally for writes (disk full / out-of-space)
* and for choosing a volume for a new container (same threshold as writes).
*/
public long getHardLimitMinFreeSpace(long capacity) {
return Math.max((long) (capacity * minFreeSpaceHardLimitRatio), minFreeSpace);
}

/**
* Width of the soft band: reported spare minus hard spare. For example, with 2000GB capacity,
* 2% reported (40GB) and 1.5% hard (30GB), this is 10GB — the gap where the DN may send
* close-container actions while writes still succeed.
*/
@VisibleForTesting
public long getSoftBandMinFreeSpaceWidth(long capacity) {
Comment thread
ashishkumar50 marked this conversation as resolved.
long reported = getMinFreeSpace(capacity);
long hard = getHardLimitMinFreeSpace(capacity);
return Math.max(0L, reported - hard);
}

public long getMinFreeSpace() {
return minFreeSpace;
}
Expand All @@ -831,6 +887,11 @@ public float getMinFreeSpaceRatio() {
return minFreeSpaceRatio;
}

@VisibleForTesting
public float getMinFreeSpaceHardLimitRatio() {
return minFreeSpaceHardLimitRatio;
}

public long getPeriodicDiskCheckIntervalMinutes() {
return periodicDiskCheckIntervalMinutes;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

/**
* Filter for selecting volumes with enough space for a new container.
* Uses the <em>hard</em> min-free spare (same as write checks), not the SCM-reported spare in
* {@link StorageLocationReport#getFreeSpaceToSpare()}. The gap between reported and hard is the
* soft band (e.g. 40GB − 30GB on a 2000GB disk with 2% vs 1.5%).
* Keeps track of ineligible volumes for logging/debug purposes.
*/
public class AvailableSpaceFilter implements Predicate<HddsVolume> {
Expand All @@ -39,9 +42,25 @@ public AvailableSpaceFilter(long requiredSpace) {
@Override
public boolean test(HddsVolume vol) {
StorageLocationReport report = vol.getReport();
long available = report.getUsableSpace();
long capacity = report.getCapacity();
long spareAtHardLimit = vol.getFreeSpaceToSpare(capacity);
long spareReported = vol.getReportedFreeSpaceToSpare(capacity);
long available =
report.getRemaining() - report.getCommitted() - spareAtHardLimit;
long availableAtReportedSpare =
report.getRemaining() - report.getCommitted() - spareReported;

boolean hasEnoughSpace = available > requiredSpace;

VolumeInfoMetrics stats = vol.getVolumeInfoStats();
if (stats != null) {
if (!hasEnoughSpace) {
stats.incNumContainerCreateRequestsRejectedHardMinFreeSpace();
} else if (availableAtReportedSpare <= requiredSpace) {
stats.incNumContainerCreateRequestsInSoftBandMinFreeSpace();
}
}

mostAvailableSpace = Math.max(available, mostAvailableSpace);

if (!hasEnoughSpace) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ protected StorageLocationReport.Builder reportBuilder() {
StorageLocationReport.Builder builder = super.reportBuilder();
if (!builder.isFailed()) {
builder.setCommitted(getCommittedBytes())
.setFreeSpaceToSpare(getFreeSpaceToSpare(builder.getCapacity()));
.setFreeSpaceToSpare(getReportedFreeSpaceToSpare(builder.getCapacity()));
}
return builder;
}
Expand Down Expand Up @@ -409,10 +409,22 @@ public long getCommittedBytes() {
return committedBytes.get();
}

public long getFreeSpaceToSpare(long volumeCapacity) {
/**
* Minimum free space reported to SCM (heartbeat), from
* {@code hdds.datanode.volume.min.free.space.percent}.
*/
public long getReportedFreeSpaceToSpare(long volumeCapacity) {
return getDatanodeConfig().getMinFreeSpace(volumeCapacity);
}

/**
* Minimum free space enforced locally for writes (see
* {@code hdds.datanode.volume.min.free.space.hard.limit.percent}).
*/
public long getFreeSpaceToSpare(long volumeCapacity) {
return getDatanodeConfig().getHardLimitMinFreeSpace(volumeCapacity);
}

@Override
public void setGatherContainerUsages(Function<HddsVolume, Long> gatherContainerUsages) {
this.gatherContainerUsages = gatherContainerUsages;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ public class VolumeInfoMetrics implements MetricsSource {
@Metric("Number of scans skipped for the volume")
private MutableCounterLong numScansSkipped;

@Metric("Write requests allowed while usable space is between the reported (soft) and hard min-free-space thresholds")
private MutableCounterLong numWriteRequestsInSoftBandMinFreeSpace;

@Metric("Write requests rejected because the hard min-free-space limit would be violated")
private MutableCounterLong numWriteRequestsRejectedHardMinFreeSpace;
@Metric("Container create allowed while usable space is between the reported (soft) " +
"and hard min-free-space thresholds")
private MutableCounterLong numContainerCreateRequestsInSoftBandMinFreeSpace;

@Metric("Container create requests rejected because the hard min-free-space limit would be violated")
private MutableCounterLong numContainerCreateRequestsRejectedHardMinFreeSpace;

/**
* @param identifier Typically, path to volume root. E.g. /data/hdds
*/
Expand Down Expand Up @@ -185,6 +197,38 @@ public void incNumScansSkipped() {
numScansSkipped.incr();
}

public long getNumWriteRequestsInSoftBandMinFreeSpace() {
return numWriteRequestsInSoftBandMinFreeSpace.value();
}

public void incNumWriteRequestsInSoftBandMinFreeSpace() {
numWriteRequestsInSoftBandMinFreeSpace.incr();
}

public long getNumWriteRequestsRejectedHardMinFreeSpace() {
return numWriteRequestsRejectedHardMinFreeSpace.value();
}

public void incNumWriteRequestsRejectedHardMinFreeSpace() {
numWriteRequestsRejectedHardMinFreeSpace.incr();
}

public long getNumContainerCreateRequestsInSoftBandMinFreeSpace() {
return numContainerCreateRequestsInSoftBandMinFreeSpace.value();
}

public void incNumContainerCreateRequestsInSoftBandMinFreeSpace() {
numContainerCreateRequestsInSoftBandMinFreeSpace.incr();
}

public long getNumContainerCreateRequestsRejectedHardMinFreeSpace() {
return numContainerCreateRequestsRejectedHardMinFreeSpace.value();
}

public void incNumContainerCreateRequestsRejectedHardMinFreeSpace() {
numContainerCreateRequestsRejectedHardMinFreeSpace.incr();
}

@Override
public void getMetrics(MetricsCollector collector, boolean all) {
MetricsRecordBuilder builder = collector.addRecord(metricsSourceName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.io.File;
Expand All @@ -42,13 +44,18 @@
import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.fs.SpaceUsageSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.ByteStringConversion;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.common.ChunkBuffer;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.VolumeInfoMetrics;
import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -227,4 +234,41 @@ private static void assertDetailsEquals(DatanodeDetails expected,
assertEquals(expected.getInitialVersion(), actual.getInitialVersion());
assertEquals(expected.getIpAddress(), actual.getIpAddress());
}

@Test
public void assertSpaceAvailabilityIncrementsSoftBandWhenBetweenReportedAndHard()
throws Exception {
HddsVolume volume = mock(HddsVolume.class);
VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class);
SpaceUsageSource.Fixed usage = new SpaceUsageSource.Fixed(1000L, 100L, 900L);
when(volume.getCurrentUsage()).thenReturn(usage);
when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L);
when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L);
when(volume.getVolumeInfoStats()).thenReturn(metrics);
when(volume.toString()).thenReturn("mockVolume");

ContainerUtils.assertSpaceAvailability(1L, volume, 50);

verify(metrics).incNumWriteRequestsInSoftBandMinFreeSpace();
verify(metrics, never()).incNumWriteRequestsRejectedHardMinFreeSpace();
}

@Test
public void assertSpaceAvailabilityIncrementsHardRejectWhenHardLimitViolated() {
HddsVolume volume = mock(HddsVolume.class);
VolumeInfoMetrics metrics = mock(VolumeInfoMetrics.class);
SpaceUsageSource.Fixed usage = new SpaceUsageSource.Fixed(1000L, 100L, 900L);
when(volume.getCurrentUsage()).thenReturn(usage);
when(volume.getFreeSpaceToSpare(1000L)).thenReturn(30L);
when(volume.getReportedFreeSpaceToSpare(1000L)).thenReturn(100L);
when(volume.getVolumeInfoStats()).thenReturn(metrics);
when(volume.toString()).thenReturn("mockVolume");

StorageContainerException ex = assertThrows(StorageContainerException.class,
() -> ContainerUtils.assertSpaceAvailability(1L, volume, 80));
assertEquals(Result.DISK_OUT_OF_SPACE, ex.getResult());

verify(metrics).incNumWriteRequestsRejectedHardMinFreeSpace();
verify(metrics, never()).incNumWriteRequestsInSoftBandMinFreeSpace();
}
}
Loading