diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index e159eb6948b8..a8524fcaa661 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3607,11 +3607,23 @@ ozone.recon.scm.container.threshold - 100 + 10000 + OZONE, RECON, SCM + + Non-OPEN container count drift threshold above which Recon escalates from + incremental SCM container sync to a full SCM DB snapshot sync. Missing + OPEN containers stay on the incremental path because they are short-lived + and can be repaired cheaply without replacing the full SCM DB. + + + + ozone.recon.scm.per.state.drift.threshold + 5 OZONE, RECON, SCM - Threshold value for the difference in number of containers - in SCM and RECON. + Per-state lifecycle drift threshold used when SCM and Recon total container + counts are equal. If OPEN, QUASI_CLOSED, or derived CLOSED counts differ by + more than this value, Recon triggers a targeted SCM container sync. @@ -4600,6 +4612,35 @@ Interval in MINUTES by Recon to request SCM DB Snapshot. + + ozone.recon.scm.container.sync.task.initial.delay + 2m + OZONE, MANAGEMENT, RECON, SCM + + Initial delay before Recon starts the incremental SCM container sync task. + This is slightly later than the SCM snapshot initial delay so the snapshot + can initialize Recon's SCM DB before the first incremental sync runs. + + + + ozone.recon.scm.container.sync.task.interval.delay + 1h + OZONE, MANAGEMENT, RECON, SCM + + Interval between incremental SCM container sync runs in Recon. Each cycle + evaluates drift between SCM and Recon and either runs the targeted + multi-pass sync or takes no action. + + + + ozone.recon.scm.deleted.container.check.batch.size + 500 + OZONE, RECON, SCM, PERFORMANCE + + Maximum number of CLOSED or QUASI_CLOSED Recon containers checked against + SCM per incremental sync cycle for DELETING or DELETED retirement. + + ozone.om.snapshot.compaction.dag.max.time.allowed 30d diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 56e2ef6408fc..c342a9a9e228 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -1248,10 +1248,12 @@ public long getContainerCount() throws IOException { public long getContainerCount(HddsProtos.LifeCycleState state) throws IOException { GetContainerCountRequestProto request = - GetContainerCountRequestProto.newBuilder().build(); + GetContainerCountRequestProto.newBuilder() + .setState(state) + .build(); GetContainerCountResponseProto response = - submitRequest(Type.GetClosedContainerCount, + submitRequest(Type.GetContainerCount, builder -> builder.setGetContainerCountRequest(request)) .getGetContainerCountResponse(); return response.getContainerCount(); diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 933bb4a00870..d33e949c01a7 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -470,6 +470,7 @@ message GetPipelineResponseProto { } message GetContainerCountRequestProto { + optional LifeCycleState state = 1; } message GetContainerCountResponseProto { diff --git a/hadoop-hdds/interface-admin/src/main/resources/proto.lock b/hadoop-hdds/interface-admin/src/main/resources/proto.lock index 81af08d2ca99..9c3866826312 100644 --- a/hadoop-hdds/interface-admin/src/main/resources/proto.lock +++ b/hadoop-hdds/interface-admin/src/main/resources/proto.lock @@ -1544,7 +1544,15 @@ ] }, { - "name": "GetContainerCountRequestProto" + "name": "GetContainerCountRequestProto", + "fields": [ + { + "id": 1, + "name": "state", + "type": "LifeCycleState", + "optional": true + } + ] }, { "name": "GetContainerCountResponseProto", @@ -2358,4 +2366,4 @@ } } ] -} \ No newline at end of file +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java index 7b50dfceb7bb..c014f72a54f4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java @@ -239,7 +239,8 @@ private void initialize() throws IOException { final ContainerInfo container = iterator.next(); Objects.requireNonNull(container, "container == null"); containers.addContainer(container); - if (container.getState() == LifeCycleState.OPEN) { + if (container.getState() == LifeCycleState.OPEN + && container.getPipelineID() != null) { try { pipelineManager.addContainerToPipelineSCMStart( container.getPipelineID(), container.containerID()); @@ -260,8 +261,12 @@ private void initialize() throws IOException { getContainerStateChangeActions() { final Map> actions = new EnumMap<>(LifeCycleEvent.class); - actions.put(FINALIZE, info -> pipelineManager - .removeContainerFromPipeline(info.getPipelineID(), info.containerID())); + actions.put(FINALIZE, info -> { + if (info.getPipelineID() != null) { + pipelineManager.removeContainerFromPipeline( + info.getPipelineID(), info.containerID()); + } + }); return actions; } @@ -334,12 +339,16 @@ public void addContainer(final ContainerInfoProto containerInfo) transactionBuffer.addToBuffer(containerStore, containerID, container); containers.addContainer(container); - if (pipelineManager.containsPipeline(pipelineID)) { + if (pipelineID != null && pipelineManager.containsPipeline(pipelineID)) { pipelineManager.addContainerToPipeline(pipelineID, containerID); } else if (containerInfo.getState(). equals(LifeCycleState.OPEN)) { - // Pipeline should exist, but not - throw new PipelineNotFoundException(); + if (pipelineID != null) { + // OPEN containers normally require a live pipeline reference. + throw new PipelineNotFoundException(); + } + LOG.warn("Adding OPEN container {} without pipeline tracking " + + "because its pipeline ID is null.", containerID); } //recon may receive report of closed container, // no corresponding Pipeline can be synced for scm. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 73bf92e9cd58..00a9b6b3a0ce 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -1350,9 +1350,12 @@ public DatanodeUsageInfoResponseProto getDatanodeUsageInfo( public GetContainerCountResponseProto getContainerCount( StorageContainerLocationProtocolProtos.GetContainerCountRequestProto request) throws IOException { + long containerCount = request.hasState() + ? impl.getContainerCount(request.getState()) + : impl.getContainerCount(); return GetContainerCountResponseProto.newBuilder() - .setContainerCount(impl.getContainerCount()) + .setContainerCount(containerCount) .build(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 8f0c79728ed5..2cb6bb2f75db 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -415,8 +415,21 @@ public List getExistContainerWithPipelinesInBatch( ContainerWithPipeline cp = getContainerWithPipelineCommon(containerID); cpList.add(cp); } catch (IOException ex) { - //not found , just go ahead - LOG.error("Container with common pipeline not found: {}", ex); + // Pipeline lookup failed (e.g., QUASI_CLOSED container whose pipeline + // has already been cleaned up). Return the container metadata without a + // pipeline so that callers (e.g., Recon's sync) can still record the + // container rather than losing it silently. + LOG.warn("Pipeline lookup failed for container {}; returning container " + + "without pipeline. Cause: {}", containerID, ex.getMessage()); + try { + ContainerInfo info = scm.getContainerManager() + .getContainer(ContainerID.valueOf(containerID)); + cpList.add(new ContainerWithPipeline(info, null)); + } catch (ContainerNotFoundException notFound) { + // Container truly does not exist in SCM — exclude it from the result. + LOG.error("Container {} not found in SCM and will not be returned " + + "to caller.", containerID, notFound); + } } } return cpList; diff --git a/hadoop-ozone/dist/src/main/compose/ozone/docker-config b/hadoop-ozone/dist/src/main/compose/ozone/docker-config index ecca3a971c61..f43ea2c03164 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone/docker-config @@ -23,7 +23,7 @@ CORE-SITE.XML_hadoop.proxyuser.hadoop.groups=* OZONE-SITE.XML_ozone.om.address=om OZONE-SITE.XML_ozone.om.http-address=om:9874 OZONE-SITE.XML_ozone.scm.http-address=scm:9876 -OZONE-SITE.XML_ozone.scm.container.size=1GB +OZONE-SITE.XML_ozone.scm.container.size=100MB OZONE-SITE.XML_ozone.scm.block.size=1MB OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=30s @@ -43,6 +43,16 @@ OZONE-SITE.XML_ozone.recon.http-address=0.0.0.0:9888 OZONE-SITE.XML_ozone.recon.https-address=0.0.0.0:9889 OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m OZONE-SITE.XML_ozone.recon.om.snapshot.task.initial.delay=20s +OZONE-SITE.XML_ozone.recon.scm.container.sync.task.initial.delay=30s +OZONE-SITE.XML_ozone.recon.scm.container.sync.task.interval.delay=2m +OZONE-SITE.XML_ozone.recon.scm.snapshot.task.initial.delay=20s +OZONE-SITE.XML_ozone.recon.scm.snapshot.task.interval.delay=30m +OZONE-SITE.XML_ozone.recon.scm.container.threshold=20 +OZONE-SITE.XML_ozone.recon.scm.per.state.drift.threshold=1 +OZONE-SITE.XML_ozone.recon.scm.deleted.container.check.batch.size=50 +OZONE-SITE.XML_hdds.heartbeat.recon.interval=5m +OZONE-SITE.XML_hdds.container.report.interval=1h +OZONE-SITE.XML_hdds.pipeline.report.interval=5m OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE-SITE.XML_hdds.container.report.interval=60s @@ -51,8 +61,8 @@ OZONE-SITE.XML_ozone.scm.dead.node.interval=45s OZONE-SITE.XML_hdds.heartbeat.interval=5s OZONE-SITE.XML_ozone.scm.close.container.wait.duration=5s OZONE-SITE.XML_hdds.scm.replication.thread.interval=15s -OZONE-SITE.XML_hdds.scm.replication.under.replicated.interval=5s -OZONE-SITE.XML_hdds.scm.replication.over.replicated.interval=5s +OZONE-SITE.XML_hdds.scm.replication.under.replicated.interval=10s +OZONE-SITE.XML_hdds.scm.replication.over.replicated.interval=2m OZONE-SITE.XML_hdds.scm.wait.time.after.safemode.exit=30s OZONE-SITE.XML_ozone.http.basedir=/tmp/ozone_http diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerHealthSummaryEndToEnd.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerHealthSummaryEndToEnd.java new file mode 100644 index 000000000000..145b0d5ec1b2 --- /dev/null +++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerHealthSummaryEndToEnd.java @@ -0,0 +1,1292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon; + +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType.KeyValueContainer; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; +import org.apache.hadoop.hdds.scm.container.ContainerHealthState; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerManager; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.UniformDatanodesFactory; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; +import org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager; +import org.apache.hadoop.ozone.recon.persistence.ContainerHealthSchemaManager.UnhealthyContainerRecord; +import org.apache.hadoop.ozone.recon.scm.ReconContainerManager; +import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; +import org.apache.hadoop.ozone.recon.tasks.ReconTaskConfig; +import org.apache.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates; +import org.apache.ozone.test.LambdaTestUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Comprehensive end-to-end integration test validating that: + *
    + *
  1. Container State Summary — per lifecycle-state counts (OPEN, CLOSING, + * QUASI_CLOSED, CLOSED) are identical between SCM and Recon after a full sync.
  2. + *
  3. Container Health Summary — UNHEALTHY_CONTAINERS derby table counts in + * Recon match exactly the health states classified by SCM's ReplicationManager + * after both process the same container replica state.
  4. + *
+ * + *

Health states covered: + *

    + *
  • {@code UNDER_REPLICATED} — RF3 CLOSED container with 1 replica removed from + * both SCM and Recon → 2 of 3 replicas present.
  • + *
  • {@code OVER_REPLICATED} — RF1 CLOSED container with a phantom replica injected + * into both SCM and Recon → 2 replicas for an RF1 container.
  • + *
  • {@code MISSING} — RF1 CLOSED container with all replicas removed from both, + * {@code numberOfKeys=1} → SCM RM: {@code MISSING} (via + * {@code RatisReplicationCheckHandler}), Recon: {@code MISSING}.
  • + *
  • {@code EMPTY_MISSING} — RF1 CLOSING container with all replicas removed + * from both, {@code numberOfKeys=0} (default). SCM RM emits both: + * {@code getStat(MISSING)} (via {@code ClosingContainerHandler}) for these containers + * AND {@code getStat(EMPTY)} (via {@code EmptyContainerHandler} case 3) for the + * CLOSED contrast group below. When the same container is both MISSING + * (no replicas → health=MISSING in SCM) and EMPTY (no keys → numberOfKeys=0), + * Recon stores it as {@code EMPTY_MISSING}.
  • + *
  • {@code EMPTY} (contrast to {@code EMPTY_MISSING}) — RF1 CLOSED container + * with 0 replicas and {@code numberOfKeys=0}, never created on any datanode. + * SCM RM: {@code EMPTY} (via {@code EmptyContainerHandler} case 3, which fires + * before {@code RatisReplicationCheckHandler} and stops the chain). + * Recon: also {@code EMPTY} — NOT stored in {@code UNHEALTHY_CONTAINERS}. This + * shows that the same content properties (0 keys + 0 replicas) produce a different + * classification depending on lifecycle state: CLOSING → MISSING/EMPTY_MISSING, + * CLOSED → EMPTY/not-stored.
  • + *
  • {@code MIS_REPLICATED} — NOT COVERED: requires a rack-aware placement policy + * configured with a specific multi-rack DN topology, not available in mini-cluster + * integration tests. Expected count = 0 in both SCM and Recon.
  • + *
+ * + *

Key design notes on EMPTY, MISSING, and EMPTY_MISSING: + *

    + *
  • A container is stored as {@code EMPTY_MISSING} in Recon when it is + * classified as {@code MISSING} by SCM's RM (no replicas → health=MISSING) + * AND the container is empty (no OM-tracked keys → numberOfKeys=0). + * SCM's RM emits {@code getStat(MISSING)} for such containers, while Recon + * refines this to {@code EMPTY_MISSING} in {@code handleMissingContainer()}. + *
  • + *
  • MISSING path: CLOSED + 0 replicas + {@code numberOfKeys > 0} → + * {@code EmptyContainerHandler} case 3 does NOT fire (numberOfKeys≠0) → + * {@code RatisReplicationCheckHandler} fires → SCM: {@code MISSING}, + * Recon: {@code MISSING}.
  • + *
  • EMPTY_MISSING path: CLOSING + 0 replicas + {@code numberOfKeys == 0} → + * {@code ClosingContainerHandler} fires → SCM: {@code MISSING} (getStat(MISSING)++), + * Recon: {@code EMPTY_MISSING}. The container is simultaneously MISSING (no replicas, + * health=MISSING) and EMPTY (no keys, numberOfKeys=0).
  • + *
  • EMPTY (not EMPTY_MISSING) path: CLOSED + 0 replicas + + * {@code numberOfKeys == 0} → {@code EmptyContainerHandler} case 3 fires + * first (CLOSED state, before {@code RatisReplicationCheckHandler}) → + * SCM: {@code EMPTY} (getStat(EMPTY)++). Even though this container also has 0 + * replicas, the chain stops at EMPTY and never reaches MISSING classification. + * Recon also classifies it as EMPTY and does NOT store it in + * {@code UNHEALTHY_CONTAINERS}. This is the critical boundary.
  • + *
+ */ +public class TestReconContainerHealthSummaryEndToEnd { + + private static final Logger LOG = + LoggerFactory.getLogger(TestReconContainerHealthSummaryEndToEnd.class); + + // Timeouts + private static final int PIPELINE_READY_TIMEOUT_MS = 30_000; + private static final int POLL_INTERVAL_MS = 500; + // Upper bound for waiting on replica ICRs to propagate after container creation. + // RF3 Ratis containers require all 3 DataNodes to commit via Ratis consensus and + // then each DN sends a separate ICR to Recon. In slower CI environments this can + // take longer than a simple RF1 allocation; 60 seconds gives enough headroom. + private static final int REPLICA_SYNC_TIMEOUT_MS = 60_000; + + // Upper bound for UNHEALTHY_CONTAINERS query pagination (no paging needed for tests) + private static final int MAX_RESULT = 100_000; + + private MiniOzoneCluster cluster; + private OzoneConfiguration conf; + private ReconService recon; + + @BeforeEach + public void init() throws Exception { + conf = new OzoneConfiguration(); + // Use a 10-minute full container report (FCR) interval so that datanodes do + // NOT send periodic full reports during the test (<3 min). Incremental + // container reports (ICRs) are still sent immediately on container creation, + // which is what we rely on to populate replica state. The long FCR window + // prevents a removed replica from being re-added by a background DN report + // before processAll() runs. + conf.set(HDDS_CONTAINER_REPORT_INTERVAL, "10m"); + conf.set(HDDS_PIPELINE_REPORT_INTERVAL, "1s"); + + // Delay Recon's background SCM-sync schedulers well beyond any test duration + // so they cannot interfere with the test's manual syncWithSCMContainerInfo() + // calls. Without this, the snapshot scheduler fires at ~1 minute (its default + // initial delay), acquires the isSyncDataFromSCMRunning flag, and — before the + // flag-leak fix — never releases it, causing all subsequent + // syncWithSCMContainerInfo() calls to silently return false, leaving + // containers absent from Recon and causing ContainerNotFoundException. + conf.set(OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY, "1h"); + conf.set(OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY, "1h"); + + ReconTaskConfig taskConfig = conf.getObject(ReconTaskConfig.class); + taskConfig.setMissingContainerTaskInterval(Duration.ofSeconds(2)); + conf.setFromObject(taskConfig); + + // Keep SCM's remediation processors idle during tests so injected unhealthy + // states are not healed before assertions run. 5 minutes is well beyond any + // test's duration. + conf.set("hdds.scm.replication.under.replicated.interval", "5m"); + conf.set("hdds.scm.replication.over.replicated.interval", "5m"); + + recon = new ReconService(conf); + cluster = MiniOzoneCluster.newBuilder(conf) + .setNumDatanodes(3) + .setDatanodeFactory(UniformDatanodesFactory.newBuilder().build()) + .addService(recon) + .build(); + cluster.waitForClusterToBeReady(); + cluster.waitForPipelineTobeReady(ONE, PIPELINE_READY_TIMEOUT_MS); + cluster.waitForPipelineTobeReady( + HddsProtos.ReplicationFactor.THREE, PIPELINE_READY_TIMEOUT_MS); + + // Wait until Recon's pipeline manager has synced from SCM so RF3 containers + // can be allocated and reach Recon's replica bookkeeping. + ReconStorageContainerManagerFacade reconScm = getReconScm(); + LambdaTestUtils.await(PIPELINE_READY_TIMEOUT_MS, POLL_INTERVAL_MS, + () -> !reconScm.getPipelineManager().getPipelines().isEmpty()); + } + + @AfterEach + public void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + // --------------------------------------------------------------------------- + // Test 1 — Container State Summary + // --------------------------------------------------------------------------- + + /** + * Validates that per lifecycle-state container counts match exactly between + * SCM and Recon for all four induciable lifecycle states. + * + *

After allocating containers in SCM and transitioning them to OPEN, + * CLOSING, QUASI_CLOSED and CLOSED states, a full + * {@code syncWithSCMContainerInfo()} is executed. The test then asserts: + *

+   *   scmCm.getContainers(state).size() == reconCm.getContainers(state).size()
+   * 
+ * for every {@link HddsProtos.LifeCycleState} value. + * + *

Note on DELETING and DELETED: transitioning to these states requires + * additional SCM-internal bookkeeping (block deletion flows) that goes + * beyond direct ContainerManager API calls. These states are not induced + * here but their expected count (0) is still validated. + */ + @Test + public void testContainerStateSummaryMatchesBetweenSCMAndRecon() + throws Exception { + StorageContainerManager scm = cluster.getStorageContainerManager(); + ContainerManager scmCm = scm.getContainerManager(); + ReconStorageContainerManagerFacade reconScm = getReconScm(); + ReconContainerManager reconCm = + (ReconContainerManager) reconScm.getContainerManager(); + + // Allocate all containers as OPEN in SCM first. syncWithSCMContainerInfo() + // (Pass 2) adds OPEN containers from SCM to Recon. We then transition each + // group to its target state in BOTH SCM and Recon so the counts always match. + // + // CLOSING containers must follow this allocate-then-sync-then-FINALIZE pattern + // because the four-pass sync does NOT cover the CLOSING lifecycle state — it + // only syncs OPEN, CLOSED, and QUASI_CLOSED containers. + + // OPEN — 3 RF1 containers; no state transition needed. + List openIds = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + openIds.add(scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test").containerID()); + } + + // Allocate CLOSING, QUASI_CLOSED, and CLOSED candidates as OPEN in SCM. + List closingIds = new ArrayList<>(); + List quasiClosedIds = new ArrayList<>(); + List closedIds = new ArrayList<>(); + + for (int i = 0; i < 3; i++) { + closingIds.add(scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test").containerID()); + } + for (int i = 0; i < 3; i++) { + quasiClosedIds.add(scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test").containerID()); + } + for (int i = 0; i < 3; i++) { + closedIds.add(scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test").containerID()); + } + + // Sync Recon: Pass 2 adds all OPEN containers (all 12 allocated above) to Recon. + // After this sync every container is in OPEN state in both SCM and Recon. + syncAndWaitForReconContainers(reconScm, reconCm, + combineContainerIds(openIds, closingIds, quasiClosedIds, closedIds)); + + // Transition each group to its target state in BOTH SCM and Recon simultaneously. + // CLOSING — FINALIZE: OPEN → CLOSING. + for (ContainerID cid : closingIds) { + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + } + // QUASI_CLOSED — FINALIZE then QUASI_CLOSE. + for (ContainerID cid : quasiClosedIds) { + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.QUASI_CLOSE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.QUASI_CLOSE); + } + // CLOSED — FINALIZE then CLOSE. + for (ContainerID cid : closedIds) { + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLOSE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLOSE); + } + + // Assert per-state counts match between SCM and Recon for every state. + logStateSummaryHeader(); + Map mismatches = + validateAndLogStateSummary(scmCm, reconCm); + + assertTrue(mismatches.isEmpty(), + "Container State Summary counts diverge between SCM and Recon for states: " + + mismatches); + } + + // --------------------------------------------------------------------------- + // Test 2 — Container Health Summary + // --------------------------------------------------------------------------- + + /** + * Validates that Container Health Summary counts match exactly between SCM's + * {@link ReplicationManagerReport} and Recon's UNHEALTHY_CONTAINERS derby + * table after both process the same injected container states. + * + *

The test also explicitly validates the lifecycle-state boundary that + * determines when Recon emits {@code EMPTY_MISSING}: a container is stored + * as {@code EMPTY_MISSING} when SCM's RM emits {@code getStat(MISSING)} + * for it (no replicas → health=MISSING) AND the container has no keys + * (numberOfKeys=0, the "EMPTY" property). The contrast group ({@code EMPTY_ONLY}) + * shows that CLOSED containers with the same 0-key+0-replica content are + * classified as {@code EMPTY} by SCM — not {@code MISSING} — and are NOT + * stored in Recon's {@code UNHEALTHY_CONTAINERS}. + * + *

Setup per health state: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
StateRFLifecycleReplicaskeysExpected in SCM (getStat)Expected in Recon
UNDER_REPLICATEDRF3CLOSED20UNDER_REPLICATED=2UNDER_REPLICATED (count=2)
OVER_REPLICATEDRF1CLOSED2 (phantom)0OVER_REPLICATED=2OVER_REPLICATED (count=2)
MISSINGRF1CLOSED01MISSING=2MISSING (count=2)
EMPTY_MISSINGRF1CLOSING00MISSING=+2 (same stat as MISSING; total MISSING = missingIds+emptyMissingIds)EMPTY_MISSING (count=2)
EMPTY (contrast)RF1CLOSED00EMPTY=2 (EmptyContainerHandler case 3 fires, NOT MISSING)NOT stored (EMPTY not mapped to UNHEALTHY_CONTAINERS)
MIS_REPLICATEDN/AN/AN/AN/A00
+ */ + @Test + public void testContainerHealthSummaryMatchesBetweenSCMAndRecon() + throws Exception { + StorageContainerManager scm = cluster.getStorageContainerManager(); + ContainerManager scmCm = scm.getContainerManager(); + ReconStorageContainerManagerFacade reconScm = getReconScm(); + ReconContainerManager reconCm = + (ReconContainerManager) reconScm.getContainerManager(); + HealthSummarySetup setup = + setupHealthSummaryScenario(scmCm, reconScm, reconCm, 2); + + // Run SCM RM (updates ContainerInfo.healthState on every container in SCM). + // Remediation intervals are 5m so no commands will be dispatched to DNs. + scm.getReplicationManager().processAll(); + ReplicationManagerReport scmReport = + scm.getReplicationManager().getContainerReport(); + + // Run Recon RM (writes to UNHEALTHY_CONTAINERS derby table). + reconScm.getReplicationManager().processAll(); + ReconHealthRecords records = loadReconHealthRecords(reconCm); + + // Log Container Health Summary in the user-facing format. + logHealthSummary(scmReport, records.underRep, records.overRep, + records.missing, records.emptyMissing, records.misRep); + assertHealthSummaryMatches(scmCm, scmReport, setup, records); + } + + // --------------------------------------------------------------------------- + // Test 3 — Comprehensive Summary Report (State Summary + Health Summary) + // --------------------------------------------------------------------------- + + /** + * Comprehensive end-to-end test that validates both Container State Summary + * and Container Health Summary in a single scenario. After setup and both + * RM runs, logs a formatted report matching the Container Summary Report + * output format requested by the user. + * + *

Expected output pattern: + *

+   * Container Summary Report
+   * ==========================================================
+   *
+   * Container State Summary (SCM vs Recon — counts must match)
+   * =======================
+   * OPEN:         SCM=N, Recon=N
+   * CLOSING:      SCM=N, Recon=N
+   * QUASI_CLOSED: SCM=N, Recon=N
+   * CLOSED:       SCM=N, Recon=N
+   * DELETING:     SCM=0, Recon=0
+   * DELETED:      SCM=0, Recon=0
+   * RECOVERING:   SCM=0, Recon=0
+   *
+   * Container Health Summary (SCM RM Report vs Recon UNHEALTHY_CONTAINERS)
+   * ========================
+   * HEALTHY:             SCM=N  (not stored in UNHEALTHY_CONTAINERS)
+   * UNDER_REPLICATED:    SCM=N, Recon=N
+   * MIS_REPLICATED:      SCM=0, Recon=0  (not induced — rack-aware topology required)
+   * OVER_REPLICATED:     SCM=N, Recon=N
+   * MISSING:             SCM=N, Recon MISSING=N + EMPTY_MISSING=N
+   * ...
+   * 
+ */ + @Test + public void testComprehensiveSummaryReport() throws Exception { + StorageContainerManager scm = cluster.getStorageContainerManager(); + ContainerManager scmCm = scm.getContainerManager(); + ReconStorageContainerManagerFacade reconScm = getReconScm(); + ReconContainerManager reconCm = + (ReconContainerManager) reconScm.getContainerManager(); + setupStateSummaryScenario(scmCm, reconScm, reconCm); + HealthSummarySetup setup = + setupHealthSummaryScenario(scmCm, reconScm, reconCm, 1); + + // Run both RMs. + scm.getReplicationManager().processAll(); + ReplicationManagerReport scmReport = + scm.getReplicationManager().getContainerReport(); + reconScm.getReplicationManager().processAll(); + ReconHealthRecords records = loadReconHealthRecords(reconCm); + logContainerSummaryReport(scmCm, reconCm, scmReport, records); + assertStateSummaryMatches(scmCm, reconCm); + assertHealthSummaryMatches(scmCm, scmReport, setup, records); + } + + private void setupStateSummaryScenario( + ContainerManager scmCm, + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm) throws Exception { + List closingStateCandidates = new ArrayList<>(); + List quasiClosedStateCandidates = new ArrayList<>(); + for (int i = 0; i < 2; i++) { + closingStateCandidates.add(scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test").containerID()); + quasiClosedStateCandidates.add(scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test").containerID()); + } + syncAndWaitForReconContainers(reconScm, reconCm, + combineContainerIds(closingStateCandidates, quasiClosedStateCandidates)); + for (ContainerID cid : closingStateCandidates) { + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + } + for (ContainerID cid : quasiClosedStateCandidates) { + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.QUASI_CLOSE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.QUASI_CLOSE); + } + } + + private HealthSummarySetup setupHealthSummaryScenario( + ContainerManager scmCm, + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm, + int count) throws Exception { + HealthSummarySetup setup = new HealthSummarySetup(); + setup.underReplicatedIds = + setupUnderReplicatedContainers(scmCm, reconScm, reconCm, count); + setup.overReplicatedIds = + setupOverReplicatedContainers(scmCm, reconScm, reconCm, count); + setup.missingIds = + setupMissingContainers(scmCm, reconScm, reconCm, count); + setup.emptyMissingIds = + setupEmptyMissingContainers(scmCm, reconScm, reconCm, count); + setup.emptyOnlyIds = setupEmptyOnlyContainers(scmCm, count); + syncAndWaitForReconContainers(reconScm, reconCm, setup.emptyOnlyIds.stream() + .map(ContainerID::valueOf) + .collect(Collectors.toList())); + return setup; + } + + // =========================================================================== + // Setup helpers + // =========================================================================== + + /** + * Creates RF3 CLOSED containers with exactly 2 of 3 required replicas injected + * synthetically into both SCM and Recon. Both RMs will classify these as + * {@code UNDER_REPLICATED}. + * + *

Containers are never created on actual datanodes — synthetic replicas are + * injected directly into the in-memory replica metadata. This avoids the race + * condition where the datanode (which holds the real container) re-reports its + * replica within the 1-second container-report interval, re-adding the removed + * replica before {@code processAll()} can classify the container as UNDER_REPLICATED. + * + *

Classification path: + *

    + *
  1. Container is CLOSED (FINALIZE + CLOSE) with 2 synthetic replicas (keyCount=1).
  2. + *
  3. {@code EmptyContainerHandler}: replicas not empty (keyCount=1) → does NOT fire.
  4. + *
  5. {@code RatisReplicationCheckHandler}: 2 replicas for RF3 → {@code UNDER_REPLICATED}.
  6. + *
+ */ + private List setupUnderReplicatedContainers( + ContainerManager scmCm, + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm, + int count) throws Exception { + + List ids = new ArrayList<>(); + for (int i = 0; i < count; i++) { + ContainerInfo c = scmCm.allocateContainer( + RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE), + "test"); + createContainerOnPipeline(c); + long cid = c.getContainerID(); + ContainerID containerID = ContainerID.valueOf(cid); + ids.add(cid); + + syncAndWaitForReconContainers(reconScm, reconCm, + Arrays.asList(containerID)); + + // The explicit createContainerOnPipeline() above ensures the physical + // container exists on the RF3 pipeline, so both SCM and Recon should + // learn the initial 3 replicas via the normal create-time report path. + LambdaTestUtils.await(REPLICA_SYNC_TIMEOUT_MS, POLL_INTERVAL_MS, () -> { + try { + return scmCm.getContainerReplicas(containerID).size() >= 3 + && reconCm.getContainerReplicas(containerID).size() >= 3; + } catch (Exception e) { + return false; + } + }); + drainScmAndReconEventQueues(); + + // Transition the container to CLOSED in both SCM and Recon metadata. + // ContainerManagerImpl.updateContainerState() does NOT dispatch CLOSE + // commands to the DNs (those are dispatched by the ReplicationManager + // and CloseContainerEventHandler, both of which are idle during tests + // due to the 5m interval settings). Therefore no further ICRs are + // triggered by this metadata-only state change. + closeInBoth(scmCm, reconCm, containerID); + + // Remove exactly 1 physical replica from a real DN and let heartbeat / + // report processing update SCM and Recon through the normal path. + ContainerReplica toRemove = scmCm.getContainerReplicas(containerID) + .iterator().next(); + deleteContainerReplica(cluster, toRemove.getDatanodeDetails(), cid); + LambdaTestUtils.await(REPLICA_SYNC_TIMEOUT_MS, POLL_INTERVAL_MS, () -> { + try { + return scmCm.getContainerReplicas(containerID).size() == 2 + && reconCm.getContainerReplicas(containerID).size() == 2; + } catch (Exception e) { + return false; + } + }); + } + return ids; + } + + /** + * Creates RF1 CLOSED containers with 2 replicas in both SCM and Recon: + * 1 real replica (registered via ICR when the DN creates the container) plus + * 1 phantom replica injected on a different DN. + * Both RMs will classify these as {@code OVER_REPLICATED} + * (2 replicas for an RF1 container that expects only 1). + * + *

Classification path: + *

    + *
  1. Container is RF1, CLOSED. 1 DN has the container (real replica). + * A phantom replica is injected for a second DN that never had it.
  2. + *
  3. {@code EmptyContainerHandler}: replicas not empty → does NOT fire.
  4. + *
  5. {@code RatisReplicationCheckHandler}: 2 replicas for RF1 → + * {@code OVER_REPLICATED}.
  6. + *
+ */ + private List setupOverReplicatedContainers( + ContainerManager scmCm, + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm, + int count) throws Exception { + + List allDatanodes = cluster.getHddsDatanodes().stream() + .map(HddsDatanodeService::getDatanodeDetails) + .collect(Collectors.toList()); + + List ids = new ArrayList<>(); + for (int i = 0; i < count; i++) { + ContainerInfo c = scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test"); + createContainerOnPipeline(c); + long cid = c.getContainerID(); + ContainerID containerID = ContainerID.valueOf(cid); + ids.add(cid); + + syncAndWaitForReconContainers(reconScm, reconCm, + Arrays.asList(containerID)); + + LambdaTestUtils.await(REPLICA_SYNC_TIMEOUT_MS, POLL_INTERVAL_MS, () -> { + try { + return !scmCm.getContainerReplicas(containerID).isEmpty() + && !reconCm.getContainerReplicas(containerID).isEmpty(); + } catch (Exception e) { + return false; + } + }); + drainScmAndReconEventQueues(); + + // Transition to CLOSED in both SCM and Recon metadata (no CLOSE command + // dispatched to the DN; see UNDER_REPLICATED setup for the full rationale). + closeInBoth(scmCm, reconCm, containerID); + + // Inject a phantom replica on a DN that does NOT already hold the container. + // That DN will never send an ICR for this container (it doesn't have it), + // so the phantom persists for the duration of the test. + // With 10m FCR, the real DN won't send a full report that changes replica counts. + // Result: 2 replicas for RF1 → OVER_REPLICATED. + Set existingUuids = scmCm.getContainerReplicas(containerID) + .stream() + .map(r -> r.getDatanodeDetails().getUuid()) + .collect(Collectors.toSet()); + DatanodeDetails phantomDN = allDatanodes.stream() + .filter(d -> !existingUuids.contains(d.getUuid())) + .findFirst() + .orElseThrow(() -> new AssertionError( + "No spare DN available to inject phantom replica for " + containerID)); + + ContainerReplica phantom = ContainerReplica.newBuilder() + .setContainerID(containerID) + .setContainerState(ContainerReplicaProto.State.CLOSED) + .setDatanodeDetails(phantomDN) + .setKeyCount(1) + .setBytesUsed(100) + .setSequenceId(1) + .build(); + scmCm.updateContainerReplica(containerID, phantom); + reconCm.updateContainerReplica(containerID, phantom); + } + return ids; + } + + /** + * Creates RF1 CLOSED containers with 0 replicas and {@code numberOfKeys=1}. + * Both SCM RM and Recon classify these as {@code MISSING}. + * + *

Containers are never created on actual datanodes, eliminating any + * datanode-report race condition where a re-reporting datanode re-adds the + * replica before {@code processAll()} runs. + * + *

Classification path: + *

    + *
  1. Container is CLOSED (FINALIZE + CLOSE) with 0 replicas and numberOfKeys=1.
  2. + *
  3. {@code EmptyContainerHandler} case 3 requires {@code numberOfKeys == 0} → + * does NOT fire (numberOfKeys=1).
  4. + *
  5. {@code RatisReplicationCheckHandler}: 0 replicas for RF1 → {@code MISSING}.
  6. + *
  7. Recon {@code handleMissingContainer()}: {@code numberOfKeys=1 > 0} → + * stored as {@code MISSING} (not EMPTY_MISSING).
  8. + *
+ */ + /** + * Creates RF1 CLOSED containers with 0 replicas and {@code numberOfKeys=1}. + * Both SCM RM and Recon classify these as {@code MISSING}. + * + *

Classification path: + *

    + *
  1. Container is RF1, CLOSED, numberOfKeys=1, 0 replicas.
  2. + *
  3. {@code EmptyContainerHandler} case 3 requires {@code numberOfKeys == 0} + * → does NOT fire (numberOfKeys=1).
  4. + *
  5. {@code RatisReplicationCheckHandler}: 0 replicas for RF1 → + * {@code MISSING}.
  6. + *
  7. Recon {@code handleMissingContainer()}: {@code numberOfKeys=1 > 0} + * → stored as {@code MISSING} (not EMPTY_MISSING).
  8. + *
+ */ + private List setupMissingContainers( + ContainerManager scmCm, + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm, + int count) throws Exception { + + List ids = new ArrayList<>(); + for (int i = 0; i < count; i++) { + ContainerInfo c = scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test"); + createContainerOnPipeline(c); + long cid = c.getContainerID(); + ContainerID containerID = ContainerID.valueOf(cid); + ids.add(cid); + + syncAndWaitForReconContainers(reconScm, reconCm, + Arrays.asList(containerID)); + + LambdaTestUtils.await(REPLICA_SYNC_TIMEOUT_MS, POLL_INTERVAL_MS, () -> { + try { + return !scmCm.getContainerReplicas(containerID).isEmpty() + && !reconCm.getContainerReplicas(containerID).isEmpty(); + } catch (Exception e) { + return false; + } + }); + drainScmAndReconEventQueues(); + + // Transition to CLOSED in both SCM and Recon metadata. + closeInBoth(scmCm, reconCm, containerID); + + // Set numberOfKeys=1 so EmptyContainerHandler case 3 + // (CLOSED + 0 keys + 0 replicas → EMPTY) does NOT fire. + scmCm.getContainer(containerID).setNumberOfKeys(1); + reconCm.getContainer(containerID).setNumberOfKeys(1); + + // Remove the single physical replica and wait for SCM / Recon to observe + // the absence through the normal report path. + ContainerReplica toRemove = scmCm.getContainerReplicas(containerID) + .iterator().next(); + deleteContainerReplica(cluster, toRemove.getDatanodeDetails(), cid); + LambdaTestUtils.await(REPLICA_SYNC_TIMEOUT_MS, POLL_INTERVAL_MS, () -> { + try { + return scmCm.getContainerReplicas(containerID).isEmpty() + && reconCm.getContainerReplicas(containerID).isEmpty(); + } catch (Exception e) { + return false; + } + }); + } + return ids; + } + + /** + * Creates RF1 CLOSING containers with 0 replicas and {@code numberOfKeys=0}. + * SCM RM classifies these as {@code MISSING}; Recon stores them as {@code EMPTY_MISSING}. + * + *

Containers are first allocated as OPEN in SCM, synced to Recon as OPEN + * (Pass 2), then FINALIZED in both SCM and Recon simultaneously. This ensures + * the CLOSING state is present in both systems without requiring datanode creation + * (which would introduce datanode-report race conditions). + * + *

Classification path (the correct path for EMPTY_MISSING): + *

    + *
  1. Container is in CLOSING state (FINALIZE only, NOT CLOSE) with 0 replicas + * and numberOfKeys=0.
  2. + *
  3. {@code ClosingContainerHandler}: CLOSING state + 0 replicas → + * {@code report.incrementAndSample(MISSING)} → {@code MISSING} health state, + * chain stops.
  4. + *
  5. Recon {@code handleMissingContainer()}: {@code numberOfKeys=0} → + * {@code isEmptyMissing() = true} → stored as {@code EMPTY_MISSING}.
  6. + *
+ * + *

Why CLOSING (not CLOSED) is required: + * For a CLOSED container with {@code numberOfKeys=0} and 0 replicas, + * {@code EmptyContainerHandler} case 3 fires first and classifies the container as + * {@code EMPTY} — stopping the chain. Using CLOSING state bypasses this because + * {@code EmptyContainerHandler} only handles CLOSED and QUASI_CLOSED containers. + */ + private List setupEmptyMissingContainers( + ContainerManager scmCm, + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm, + int count) throws Exception { + + List ids = new ArrayList<>(); + for (int i = 0; i < count; i++) { + ContainerInfo c = scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test"); + ids.add(c.getContainerID()); + } + + // Sync adds OPEN containers from SCM to Recon (Pass 2). After this sync + // every container exists in both SCM and Recon in OPEN state. + syncAndWaitForReconContainers(reconScm, reconCm, ids.stream() + .map(ContainerID::valueOf) + .collect(Collectors.toList())); + + for (long cid : ids) { + ContainerID containerID = ContainerID.valueOf(cid); + + // Transition OPEN → CLOSING in BOTH SCM and Recon simultaneously. + // numberOfKeys stays 0 (default). 0 replicas (never on any datanode). + scmCm.updateContainerState(containerID, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(containerID, HddsProtos.LifeCycleEvent.FINALIZE); + } + return ids; + } + + /** + * Creates RF1 CLOSED containers with 0 replicas and {@code numberOfKeys=0}, + * never created on any datanode. Serves as the contrast group to + * {@code setupEmptyMissingContainers}: same content properties (0 keys + 0 replicas) + * but CLOSED lifecycle state instead of CLOSING. + * + *

Classification path: + *

    + *
  1. Container is CLOSED (FINALIZE + CLOSE) with 0 replicas and numberOfKeys=0 + * (default). The container was never created on any datanode.
  2. + *
  3. {@code EmptyContainerHandler} case 3: CLOSED + numberOfKeys==0 + + * replicas.isEmpty() → {@code report.incrementAndSample(EMPTY)} → + * {@code containerInfo.setHealthState(EMPTY)}, chain stops.
  4. + *
  5. The container WOULD be MISSING (0 replicas for RF1) if not for + * {@code EmptyContainerHandler} case 3 firing first for CLOSED containers.
  6. + *
  7. Recon: also classifies as EMPTY → {@code storeHealthStatesToDatabase()} skips + * EMPTY (not mapped to any {@code UnHealthyContainerStates}) → NOT stored in + * Recon's {@code UNHEALTHY_CONTAINERS} table.
  8. + *
+ * + *

After calling this method, the caller must invoke + * {@code reconScm.syncWithSCMContainerInfo()} to make these containers visible to + * Recon's container manager (Pass 1 of the sync discovers CLOSED containers in SCM + * that are absent from Recon and adds them with their current replica set, which is + * empty for these containers). + */ + private List setupEmptyOnlyContainers( + ContainerManager scmCm, + int count) throws Exception { + + List ids = new ArrayList<>(); + for (int i = 0; i < count; i++) { + ContainerInfo c = scmCm.allocateContainer( + RatisReplicationConfig.getInstance(ONE), "test"); + long cid = c.getContainerID(); + ContainerID containerID = ContainerID.valueOf(cid); + + // Transition to CLOSED immediately without creating the container on any datanode. + // The result is a CLOSED container with 0 replicas and numberOfKeys=0. + scmCm.updateContainerState(containerID, HddsProtos.LifeCycleEvent.FINALIZE); + scmCm.updateContainerState(containerID, HddsProtos.LifeCycleEvent.CLOSE); + + ids.add(cid); + } + return ids; + } + + // =========================================================================== + // Assertion helpers + // =========================================================================== + + private void assertStateSummaryMatches( + ContainerManager scmCm, + ReconContainerManager reconCm) { + logStateSummaryHeader(); + Map stateMismatches = + validateAndLogStateSummary(scmCm, reconCm); + assertTrue(stateMismatches.isEmpty(), + "Container State Summary counts diverge between SCM and Recon: " + + stateMismatches); + } + + private void assertHealthSummaryMatches( + ContainerManager scmCm, + ReplicationManagerReport scmReport, + HealthSummarySetup setup, + ReconHealthRecords records) throws Exception { + assertStateMatch(scmCm, setup.underReplicatedIds, records.underRep, + ContainerHealthState.UNDER_REPLICATED, "UNDER_REPLICATED", + "UNDER_REPLICATED count must match between SCM RM report and Recon " + + "UNHEALTHY_CONTAINERS"); + assertStateMatch(scmCm, setup.overReplicatedIds, records.overRep, + ContainerHealthState.OVER_REPLICATED, "OVER_REPLICATED", + "OVER_REPLICATED count must match between SCM RM report and Recon " + + "UNHEALTHY_CONTAINERS"); + assertStateMatch(scmCm, setup.missingIds, records.missing, + ContainerHealthState.MISSING, "MISSING", + "MISSING count must match between SCM RM report and Recon " + + "UNHEALTHY_CONTAINERS"); + + assertAllClassifiedBySCM(scmCm, setup.emptyOnlyIds, ContainerHealthState.EMPTY, + "EMPTY"); + assertNoneInRecon(records.emptyMissing, setup.emptyOnlyIds, + "CLOSED containers with 0 keys and 0 replicas must NOT be stored as " + + "EMPTY_MISSING"); + assertEquals(setup.emptyOnlyIds.size(), + countMatchingHealthState(scmCm, setup.emptyOnlyIds, ContainerHealthState.EMPTY), + "SCM must classify every CLOSED + 0-key + 0-replica emptyOnly " + + "container as EMPTY"); + + assertAllClassifiedBySCM(scmCm, setup.emptyMissingIds, + ContainerHealthState.MISSING, + "MISSING (CLOSING + 0 replicas → SCM RM emits getStat(MISSING)++)"); + assertAllEmptyContent(scmCm, setup.emptyMissingIds); + assertAllClassifiedByRecon(records.emptyMissing, setup.emptyMissingIds, + "EMPTY_MISSING"); + assertEquals(setup.emptyMissingIds.size(), + countMatchingReconRecords(records.emptyMissing, setup.emptyMissingIds), + "EMPTY_MISSING: CLOSING containers that are both MISSING (no " + + "replicas, getStat(MISSING)++ in SCM) and EMPTY " + + "(numberOfKeys=0) must be stored as EMPTY_MISSING in Recon"); + assertEquals((long) (setup.missingIds.size() + setup.emptyMissingIds.size()), + countMatchingHealthState(scmCm, setup.missingIds, ContainerHealthState.MISSING) + + countMatchingHealthState(scmCm, setup.emptyMissingIds, + ContainerHealthState.MISSING), + "SCM getStat(MISSING) must equal the combined MISSING + " + + "EMPTY_MISSING count"); + + assertEquals(0L, scmReport.getStat(ContainerHealthState.MIS_REPLICATED), + "MIS_REPLICATED SCM RM count should be 0 when not induced"); + assertEquals(0, records.misRep.size(), + "MIS_REPLICATED Recon count should be 0 when not induced"); + } + + private void assertStateMatch( + ContainerManager scmCm, + List ids, + List records, + ContainerHealthState expected, + String label, + String message) throws Exception { + assertAllClassifiedBySCM(scmCm, ids, expected, label); + assertAllClassifiedByRecon(records, ids, label); + assertEquals(countMatchingHealthState(scmCm, ids, expected), + countMatchingReconRecords(records, ids), message); + } + + /** + * Asserts that every container ID in {@code ids} has the expected + * {@link ContainerHealthState} set on SCM's {@link ContainerInfo} object + * after SCM's {@code ReplicationManager.processAll()} has run. + */ + private void assertAllClassifiedBySCM( + ContainerManager scmCm, + List ids, + ContainerHealthState expected, + String label) throws Exception { + for (long id : ids) { + ContainerInfo container = scmCm.getContainer(ContainerID.valueOf(id)); + // Recompute SCM health via the full RM handler chain in read-only mode + // right before asserting, instead of relying on a previously cached + // healthState value on ContainerInfo. + cluster.getStorageContainerManager().getReplicationManager() + .checkContainerStatus(container, new ReplicationManagerReport(MAX_RESULT)); + ContainerHealthState actual = container.getHealthState(); + assertEquals(expected, actual, + String.format( + "SCM must classify container %d as %s but got %s", + id, label, actual)); + } + } + + /** + * Asserts that every container ID in {@code ids} is present in Recon's + * UNHEALTHY_CONTAINERS records for the given health state label. + */ + private void assertAllClassifiedByRecon( + List records, + List ids, + String label) { + for (long id : ids) { + assertTrue(containsContainerId(records, id), + String.format( + "Recon UNHEALTHY_CONTAINERS must contain container %d in state %s", + id, label)); + } + } + + /** + * Asserts that NONE of the container IDs in {@code ids} are present in the + * given UNHEALTHY_CONTAINERS records list. + * + *

Used to verify that containers classified as {@code EMPTY} by SCM's RM + * (e.g., CLOSED + 0 replicas + 0 keys) are NOT stored in Recon's + * {@code UNHEALTHY_CONTAINERS} table under any health state. + */ + private void assertNoneInRecon( + List records, + List ids, + String message) { + for (long id : ids) { + assertFalse(containsContainerId(records, id), + String.format("Container %d should not be in UNHEALTHY_CONTAINERS: %s", + id, message)); + } + } + + /** + * Asserts that every container ID in {@code ids} has {@code numberOfKeys == 0} + * in SCM's {@link ContainerInfo}, explicitly verifying the "EMPTY" content property. + * + *

Used alongside {@link #assertAllClassifiedBySCM} for EMPTY_MISSING containers + * to confirm that both conditions for EMPTY_MISSING are present: the container is + * MISSING (health=MISSING in SCM RM) AND EMPTY (numberOfKeys=0). + */ + private void assertAllEmptyContent( + ContainerManager scmCm, + List ids) throws Exception { + for (long id : ids) { + long numKeys = scmCm.getContainer(ContainerID.valueOf(id)).getNumberOfKeys(); + assertEquals(0L, numKeys, + String.format( + "Container %d must have numberOfKeys=0 to qualify as EMPTY_MISSING " + + "(container is EMPTY in content and MISSING in replication)", id)); + } + } + + // =========================================================================== + // Validation and logging helpers + // =========================================================================== + + /** + * Validates that per lifecycle-state counts match between SCM and Recon, + * logs the comparison, and returns a map of states where they differ. + */ + private Map validateAndLogStateSummary( + ContainerManager scmCm, + ReconContainerManager reconCm) { + return Arrays.stream(HddsProtos.LifeCycleState.values()) + .filter(state -> { + int scmCount = scmCm.getContainers(state).size(); + int reconCount = reconCm.getContainers(state).size(); + LOG.info("{}: SCM={}, Recon={}", + String.format("%-12s", state.name()), scmCount, reconCount); + return scmCount != reconCount; + }) + .collect(Collectors.toMap( + state -> state, + state -> scmCm.getContainers(state).size() + - reconCm.getContainers(state).size())); + } + + private void logStateSummaryHeader() { + LOG.info(""); + LOG.info("Container State Summary (SCM vs Recon)"); + LOG.info("======================================="); + } + + private void logHealthSummary( + ReplicationManagerReport scmReport, + List reconUnderRep, + List reconOverRep, + List reconMissing, + List reconEmptyMissing, + List reconMisRep) { + LOG.info(""); + LOG.info("Container Health Summary (SCM RM Report vs Recon UNHEALTHY_CONTAINERS)"); + LOG.info("========================================================================"); + LOG.info("UNDER_REPLICATED: SCM={}, Recon={}", + scmReport.getStat(ContainerHealthState.UNDER_REPLICATED), + reconUnderRep.size()); + LOG.info("MIS_REPLICATED: SCM={}, Recon={} [not induced]", + scmReport.getStat(ContainerHealthState.MIS_REPLICATED), + reconMisRep.size()); + LOG.info("OVER_REPLICATED: SCM={}, Recon={}", + scmReport.getStat(ContainerHealthState.OVER_REPLICATED), + reconOverRep.size()); + LOG.info("MISSING: SCM={}, Recon MISSING={} + EMPTY_MISSING={}", + scmReport.getStat(ContainerHealthState.MISSING), + reconMissing.size(), reconEmptyMissing.size()); + } + + private void logContainerSummaryReport( + ContainerManager scmCm, + ReconContainerManager reconCm, + ReplicationManagerReport scmReport, + ReconHealthRecords records) { + LOG.info(""); + LOG.info("Container Summary Report"); + LOG.info("=========================================================="); + LOG.info(""); + LOG.info("Container State Summary (SCM vs Recon — counts must match)"); + LOG.info("======================="); + for (HddsProtos.LifeCycleState state : HddsProtos.LifeCycleState.values()) { + LOG.info("{}: SCM={}, Recon={}", String.format("%-12s", state.name()), + scmCm.getContainers(state).size(), reconCm.getContainers(state).size()); + } + + LOG.info(""); + LOG.info("Container Health Summary (SCM RM Report vs Recon UNHEALTHY_CONTAINERS)"); + LOG.info("========================"); + LOG.info("HEALTHY: SCM={} (not stored in UNHEALTHY_CONTAINERS)", + scmReport.getStat(ContainerHealthState.HEALTHY)); + LOG.info("UNDER_REPLICATED: SCM={}, Recon={}", + scmReport.getStat(ContainerHealthState.UNDER_REPLICATED), + records.underRep.size()); + LOG.info("MIS_REPLICATED: SCM={}, Recon={}" + + " [not induced — rack-aware topology required]", + scmReport.getStat(ContainerHealthState.MIS_REPLICATED), + records.misRep.size()); + LOG.info("OVER_REPLICATED: SCM={}, Recon={}", + scmReport.getStat(ContainerHealthState.OVER_REPLICATED), + records.overRep.size()); + LOG.info("MISSING: SCM={}, Recon MISSING={}," + + " Recon EMPTY_MISSING={} [SCM MISSING includes both MISSING + EMPTY_MISSING" + + " containers; Recon differentiates via numberOfKeys]", + scmReport.getStat(ContainerHealthState.MISSING), + records.missing.size(), records.emptyMissing.size()); + LOG.info("UNHEALTHY: SCM={}", + scmReport.getStat(ContainerHealthState.UNHEALTHY)); + LOG.info("EMPTY: SCM={}" + + " [CLOSED+0-key+0-replica containers; EmptyContainerHandler fires first;" + + " NOT stored in Recon UNHEALTHY_CONTAINERS — contrast to EMPTY_MISSING]", + scmReport.getStat(ContainerHealthState.EMPTY)); + LOG.info("OPEN_UNHEALTHY: SCM={}", + scmReport.getStat(ContainerHealthState.OPEN_UNHEALTHY)); + LOG.info("QUASI_CLOSED_STUCK: SCM={}", + scmReport.getStat(ContainerHealthState.QUASI_CLOSED_STUCK)); + LOG.info("OPEN_WITHOUT_PIPELINE: SCM={}", + scmReport.getStat(ContainerHealthState.OPEN_WITHOUT_PIPELINE)); + LOG.info("UNHEALTHY_UNDER_REPLICATED: SCM={}", + scmReport.getStat(ContainerHealthState.UNHEALTHY_UNDER_REPLICATED)); + LOG.info("UNHEALTHY_OVER_REPLICATED: SCM={}", + scmReport.getStat(ContainerHealthState.UNHEALTHY_OVER_REPLICATED)); + LOG.info("MISSING_UNDER_REPLICATED: SCM={}", + scmReport.getStat(ContainerHealthState.MISSING_UNDER_REPLICATED)); + LOG.info("QUASI_CLOSED_STUCK_UNDER_REPLICATED: SCM={}", + scmReport.getStat(ContainerHealthState.QUASI_CLOSED_STUCK_UNDER_REPLICATED)); + LOG.info("QUASI_CLOSED_STUCK_OVER_REPLICATED: SCM={}", + scmReport.getStat(ContainerHealthState.QUASI_CLOSED_STUCK_OVER_REPLICATED)); + LOG.info("QUASI_CLOSED_STUCK_MISSING: SCM={}", + scmReport.getStat(ContainerHealthState.QUASI_CLOSED_STUCK_MISSING)); + LOG.info("NEGATIVE_SIZE: Recon={}" + + " (Recon-only; no SCM RM equivalent)", + records.negSize.size()); + LOG.info("REPLICA_MISMATCH: Recon={}" + + " (Recon-only; no SCM RM equivalent)", + records.replicaMismatch.size()); + } + + // =========================================================================== + // Utility helpers + // =========================================================================== + + private ReconHealthRecords loadReconHealthRecords(ReconContainerManager reconCm) { + ContainerHealthSchemaManager healthMgr = reconCm.getContainerSchemaManager(); + ReconHealthRecords records = new ReconHealthRecords(); + records.underRep = queryUnhealthy(healthMgr, + UnHealthyContainerStates.UNDER_REPLICATED); + records.overRep = queryUnhealthy(healthMgr, + UnHealthyContainerStates.OVER_REPLICATED); + records.missing = queryUnhealthy(healthMgr, + UnHealthyContainerStates.MISSING); + records.emptyMissing = queryUnhealthy(healthMgr, + UnHealthyContainerStates.EMPTY_MISSING); + records.misRep = queryUnhealthy(healthMgr, + UnHealthyContainerStates.MIS_REPLICATED); + records.negSize = queryUnhealthy(healthMgr, + UnHealthyContainerStates.NEGATIVE_SIZE); + records.replicaMismatch = queryUnhealthy(healthMgr, + UnHealthyContainerStates.REPLICA_MISMATCH); + return records; + } + + /** + * Transitions a container to CLOSED state in both SCM and Recon by applying + * FINALIZE (OPEN → CLOSING) then CLOSE (CLOSING → CLOSED) in both systems. + * This is a metadata-only operation; no CLOSE command is dispatched to the + * actual datanodes (those are dispatched by the ReplicationManager and + * CloseContainerEventHandler, both idle during tests due to the 5m interval). + */ + private void closeInBoth(ContainerManager scmCm, ReconContainerManager reconCm, + ContainerID cid) throws Exception { + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + scmCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLOSE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.FINALIZE); + reconCm.updateContainerState(cid, HddsProtos.LifeCycleEvent.CLOSE); + } + + private List queryUnhealthy( + ContainerHealthSchemaManager healthMgr, + UnHealthyContainerStates state) { + return healthMgr.getUnhealthyContainers(state, 0L, 0L, MAX_RESULT); + } + + private long countMatchingHealthState( + ContainerManager scmCm, + List ids, + ContainerHealthState expected) throws Exception { + long count = 0; + for (long id : ids) { + if (scmCm.getContainer(ContainerID.valueOf(id)).getHealthState() == expected) { + count++; + } + } + return count; + } + + private long countMatchingReconRecords( + List records, + List ids) { + return ids.stream() + .filter(id -> containsContainerId(records, id)) + .count(); + } + + private boolean containsContainerId( + List records, long containerId) { + return records.stream().anyMatch(r -> r.getContainerId() == containerId); + } + + private void syncAndWaitForReconContainers( + ReconStorageContainerManagerFacade reconScm, + ReconContainerManager reconCm, + List containerIDs) throws Exception { + reconScm.syncWithSCMContainerInfo(); + drainScmAndReconEventQueues(); + backfillMissingContainersFromScm(reconCm, containerIDs); + LambdaTestUtils.await(REPLICA_SYNC_TIMEOUT_MS, POLL_INTERVAL_MS, + () -> containerIDs.stream().allMatch(reconCm::containerExist)); + } + + private void backfillMissingContainersFromScm( + ReconContainerManager reconCm, + List containerIDs) throws Exception { + StorageContainerManager scm = cluster.getStorageContainerManager(); + ContainerManager scmCm = scm.getContainerManager(); + for (ContainerID containerID : containerIDs) { + if (reconCm.containerExist(containerID)) { + continue; + } + + ContainerInfo scmInfo = scmCm.getContainer(containerID); + ContainerInfo reconInfo = + ContainerInfo.fromProtobuf(scmInfo.getProtobuf()); + Pipeline pipeline = null; + if (scmInfo.getPipelineID() != null) { + try { + pipeline = scm.getPipelineManager() + .getPipeline(scmInfo.getPipelineID()); + } catch (PipelineNotFoundException ignored) { + pipeline = null; + } + } + reconCm.addNewContainer(new ContainerWithPipeline(reconInfo, pipeline)); + } + } + + private void createContainerOnPipeline(ContainerInfo containerInfo) + throws Exception { + Pipeline pipeline = cluster.getStorageContainerManager() + .getPipelineManager() + .getPipeline(containerInfo.getPipelineID()); + try (XceiverClientManager clientManager = new XceiverClientManager(conf)) { + XceiverClientSpi client = clientManager.acquireClient(pipeline); + try { + ContainerProtocolCalls.createContainer( + client, containerInfo.getContainerID(), null); + } finally { + clientManager.releaseClient(client, false); + } + } + } + + private void deleteContainerReplica( + MiniOzoneCluster ozoneCluster, DatanodeDetails dn, long containerId) + throws Exception { + OzoneContainer ozoneContainer = + ozoneCluster.getHddsDatanode(dn).getDatanodeStateMachine().getContainer(); + Container containerData = + ozoneContainer.getContainerSet().getContainer(containerId); + if (containerData != null) { + ozoneContainer.getDispatcher().getHandler(KeyValueContainer) + .deleteContainer(containerData, true); + } + ozoneCluster.getHddsDatanode(dn).getDatanodeStateMachine().triggerHeartbeat(); + } + + private void drainScmAndReconEventQueues() { + ((EventQueue) cluster.getStorageContainerManager().getEventQueue()) + .processAll(5000L); + getReconScm().getEventQueue().processAll(5000L); + } + + @SafeVarargs + private final List combineContainerIds(List... groups) { + List combined = new ArrayList<>(); + for (List group : groups) { + combined.addAll(group); + } + return combined; + } + + private ReconStorageContainerManagerFacade getReconScm() { + return (ReconStorageContainerManagerFacade) + recon.getReconServer().getReconStorageContainerManager(); + } + + private static final class HealthSummarySetup { + private List underReplicatedIds; + private List overReplicatedIds; + private List missingIds; + private List emptyMissingIds; + private List emptyOnlyIds; + } + + private static final class ReconHealthRecords { + private List underRep; + private List overRep; + private List missing; + private List emptyMissing; + private List misRep; + private List negSize; + private List replicaMismatch; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java index b4da42d8f03a..63a1304fb0e9 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java @@ -132,9 +132,30 @@ public final class ReconServerConfigKeys { public static final String OZONE_RECON_METRICS_HTTP_CONNECTION_REQUEST_TIMEOUT_DEFAULT = "60s"; + /** + * Total container count drift threshold above which the periodic incremental + * sync escalates to a full SCM DB snapshot. + * + *

When {@code |(SCM_total_containers - SCM_open_containers) - + * (Recon_total_containers - Recon_open_containers)|} exceeds this value the + * targeted 4-pass sync becomes expensive (many batched RPC rounds) and a + * full checkpoint replacement is cheaper and more reliable. The comparison + * intentionally excludes OPEN containers because missing OPEN containers are + * short-lived and can be repaired incrementally without replacing the full + * SCM DB. For drift at or below this value the incremental sync corrects the + * gap without replacing the entire database. + * + *

Note: a full snapshot is also scheduled unconditionally every 24h + * (configurable via {@code ozone.recon.scm.snapshot.task.interval.delay}) + * as a structural safety net, independent of this threshold. + * + *

Default: 10,000. In large clusters (millions of containers) operators + * may raise this further since the targeted sync handles per-state + * corrections efficiently even at higher drift levels. + */ public static final String OZONE_RECON_SCM_CONTAINER_THRESHOLD = "ozone.recon.scm.container.threshold"; - public static final int OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT = 100; + public static final int OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT = 10_000; public static final String OZONE_RECON_SCM_SNAPSHOT_ENABLED = "ozone.recon.scm.snapshot.enabled"; @@ -196,6 +217,36 @@ public final class ReconServerConfigKeys { public static final String OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT = "1m"; + /** + * How often the incremental (targeted) SCM container sync runs. + * + *

Each cycle calls {@code decideSyncAction()} — two lightweight count + * RPCs to SCM — and then either runs the 4-pass incremental sync or takes + * no action. A full snapshot is still gated by + * {@code ozone.recon.scm.snapshot.task.interval.delay} (default 24h). + * + *

Default: 1h. Set to a shorter value in environments where container + * state discrepancies need to be detected and corrected faster. + */ + public static final String OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INTERVAL_DELAY = + "ozone.recon.scm.container.sync.task.interval.delay"; + + public static final String OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INTERVAL_DEFAULT + = "1h"; + + /** + * Initial delay before the first incremental SCM container sync run. + * + *

Default: 2m (slightly later than the snapshot initial delay of 1m, + * so the snapshot has time to initialize the SCM DB before the first + * incremental sync attempts to read it). + */ + public static final String OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY = + "ozone.recon.scm.container.sync.task.initial.delay"; + + public static final String + OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY_DEFAULT = "2m"; + public static final String OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY = "ozone.recon.scmclient.rpc.timeout"; @@ -253,6 +304,47 @@ public final class ReconServerConfigKeys { "ozone.recon.scm.container.id.batch.size"; public static final long OZONE_RECON_SCM_CONTAINER_ID_BATCH_SIZE_DEFAULT = 1_000_000; + /** + * Maximum number of CLOSED/QUASI_CLOSED containers to check against SCM per + * Pass 4 (DELETED retirement) sync cycle. Limiting the batch size prevents + * excessive SCM RPC load during a single sync run; containers not checked in + * one cycle are deferred to the next. + * + *

Default: 500 containers per sync cycle. + */ + public static final String OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE = + "ozone.recon.scm.deleted.container.check.batch.size"; + public static final int OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE_DEFAULT = 500; + + /** + * Per-state drift threshold used by the tiered sync decision when the total + * container count in SCM and Recon is equal. + * + *

Equal totals can still hide lifecycle state drift: a container that + * advanced from OPEN → QUASI_CLOSED → CLOSED in SCM is counted in both SCM + * and Recon's total, but Recon may still record it in the old state. + * The following per-state comparisons are evaluated: + * + *

    + *
  • OPEN: catches containers stuck OPEN in Recon after SCM has + * already moved them to CLOSING, QUASI_CLOSED, or CLOSED.
  • + *
  • QUASI_CLOSED: catches containers stuck QUASI_CLOSED in Recon + * after SCM has already moved them to CLOSED or beyond. This case is + * invisible to the OPEN check alone.
  • + *
+ * + *

If the drift in any of the checked states exceeds this + * threshold a targeted sync is triggered. A full snapshot is deliberately + * NOT triggered for per-state drift because the targeted sync's per-state + * passes already correct these conditions efficiently without replacing the + * entire database. + * + *

Default: 5. + */ + public static final String OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD = + "ozone.recon.scm.per.state.drift.threshold"; + public static final int OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD_DEFAULT = 5; + /** * Private constructor for utility class. */ diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java index 4f91b01db87a..d5740acdda40 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/TriggerDBSyncEndpoint.java @@ -19,28 +19,48 @@ import javax.inject.Inject; import javax.ws.rs.GET; +import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade; import org.apache.hadoop.ozone.recon.spi.OzoneManagerServiceProvider; /** - * Endpoint to trigger the OM DB sync between Recon and OM. + * Admin-only endpoint to manually trigger DB sync operations between Recon + * and its upstream sources (OM and SCM). + * + *

Available endpoints: + *

    + *
  • {@code GET /api/v1/triggerdbsync/om} — triggers full OM DB sync
  • + *
  • {@code POST /api/v1/triggerdbsync/scm} — triggers targeted SCM + * container sync (four-pass incremental: add missing CLOSED/OPEN/ + * QUASI_CLOSED containers, correct stale OPEN state, retire DELETED + * containers)
  • + *
*/ @Path("/triggerdbsync") @Produces(MediaType.APPLICATION_JSON) @AdminOnly public class TriggerDBSyncEndpoint { - private OzoneManagerServiceProvider ozoneManagerServiceProvider; + private final OzoneManagerServiceProvider ozoneManagerServiceProvider; + private final ReconStorageContainerManagerFacade reconScm; @Inject public TriggerDBSyncEndpoint( - OzoneManagerServiceProvider ozoneManagerServiceProvider) { + OzoneManagerServiceProvider ozoneManagerServiceProvider, + ReconStorageContainerManagerFacade reconScm) { this.ozoneManagerServiceProvider = ozoneManagerServiceProvider; + this.reconScm = reconScm; } + /** + * Triggers an immediate full OM DB sync between Recon and the Ozone Manager. + * + * @return {@code true} if the sync was initiated successfully. + */ @GET @Path("om") public Response triggerOMDBSync() { @@ -48,4 +68,37 @@ public Response triggerOMDBSync() { ozoneManagerServiceProvider.triggerSyncDataFromOMImmediately(); return Response.ok(isSuccess).build(); } + + /** + * Triggers an immediate targeted SCM container sync. + * + *

Runs the four-pass incremental sync unconditionally (bypassing the + * periodic drift-based decision): + *

    + *
  1. Pass 1 (CLOSED): adds missing CLOSED containers and corrects + * containers stuck as OPEN or CLOSING in Recon.
  2. + *
  3. Pass 2 (OPEN): adds OPEN containers that Recon never received + * (e.g., created while Recon was down).
  4. + *
  5. Pass 3 (QUASI_CLOSED): adds QUASI_CLOSED containers absent from + * Recon.
  6. + *
  7. Pass 4 (DELETED retirement): transitions containers that SCM has + * marked DELETED from their current Recon state (CLOSED/QUASI_CLOSED) + * forward to DELETED in Recon's metadata store.
  8. + *
+ * + *

This endpoint is useful for immediately resolving known discrepancies + * without waiting for the next periodic sync cycle (default: every 1h). + * For large-scale drift (hundreds of containers), consider triggering a + * full SCM DB snapshot sync instead via the Recon admin REST API. + * + * @return {@code true} if all four passes completed without fatal errors, + * {@code false} if one or more passes encountered errors (partial + * sync may have occurred; check Recon logs for details). + */ + @POST + @Path("scm") + public Response triggerSCMContainerSync() { + boolean isSuccess = reconScm.syncWithSCMContainerInfo(); + return Response.ok(isSuccess).build(); + } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java index ac1e91350cc6..c32db91de6ee 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java @@ -64,7 +64,7 @@ public class ContainerHealthSchemaManager { * twice the limit. 1,000 IDs stays well under ~30 KB, providing a safe * 2× margin.

*/ - static final int MAX_DELETE_CHUNK_SIZE = 1_000; + static final int MAX_IN_CLAUSE_CHUNK_SIZE = 1_000; private final ContainerSchemaDefinition containerSchemaDefinition; @@ -153,7 +153,8 @@ private UnhealthyContainersRecord toJooqRecord(DSLContext txContext, * limit. A single {@code IN} predicate with more than ~2,000 values (when * combined with the 7-state container_state filter) overflows this limit * and causes {@code ERROR XBCM4}. This method automatically partitions - * {@code containerIds} into chunks of at most {@value #MAX_DELETE_CHUNK_SIZE} + * {@code containerIds} into chunks of at most + * {@value #MAX_IN_CLAUSE_CHUNK_SIZE} * IDs so callers never need to worry about the limit, regardless of how * many containers a scan cycle processes. * @@ -198,8 +199,8 @@ private int deleteScmStatesForContainers(DSLContext dslContext, List containerIds) { int totalDeleted = 0; - for (int from = 0; from < containerIds.size(); from += MAX_DELETE_CHUNK_SIZE) { - int to = Math.min(from + MAX_DELETE_CHUNK_SIZE, containerIds.size()); + for (int from = 0; from < containerIds.size(); from += MAX_IN_CLAUSE_CHUNK_SIZE) { + int to = Math.min(from + MAX_IN_CLAUSE_CHUNK_SIZE, containerIds.size()); List chunk = containerIds.subList(from, to); int deleted = dslContext.deleteFrom(UNHEALTHY_CONTAINERS) @@ -221,6 +222,12 @@ private int deleteScmStatesForContainers(DSLContext dslContext, /** * Returns previous in-state-since timestamps for tracked unhealthy states. * The key is a stable containerId + state tuple. + * + *

This method also chunks the container-id predicate internally to stay + * within Derby's statement compilation limits. Large scan cycles in Recon can + * easily touch tens of thousands of containers, and expanding all IDs into a + * single {@code IN (...)} predicate causes Derby to generate bytecode that + * exceeds the JVM constant-pool / method-size limits.

*/ public Map getExistingInStateSinceByContainerIds( List containerIds) { @@ -231,24 +238,29 @@ public Map getExistingInStateSinceByContainerIds( DSLContext dslContext = containerSchemaDefinition.getDSLContext(); Map existing = new HashMap<>(); try { - dslContext.select( - UNHEALTHY_CONTAINERS.CONTAINER_ID, - UNHEALTHY_CONTAINERS.CONTAINER_STATE, - UNHEALTHY_CONTAINERS.IN_STATE_SINCE) - .from(UNHEALTHY_CONTAINERS) - .where(UNHEALTHY_CONTAINERS.CONTAINER_ID.in(containerIds)) - .and(UNHEALTHY_CONTAINERS.CONTAINER_STATE.in( - UnHealthyContainerStates.MISSING.toString(), - UnHealthyContainerStates.EMPTY_MISSING.toString(), - UnHealthyContainerStates.UNDER_REPLICATED.toString(), - UnHealthyContainerStates.OVER_REPLICATED.toString(), - UnHealthyContainerStates.MIS_REPLICATED.toString(), - UnHealthyContainerStates.NEGATIVE_SIZE.toString(), - UnHealthyContainerStates.REPLICA_MISMATCH.toString())) - .forEach(record -> existing.put( - new ContainerStateKey(record.get(UNHEALTHY_CONTAINERS.CONTAINER_ID), - record.get(UNHEALTHY_CONTAINERS.CONTAINER_STATE)), - record.get(UNHEALTHY_CONTAINERS.IN_STATE_SINCE))); + for (int from = 0; from < containerIds.size(); from += MAX_IN_CLAUSE_CHUNK_SIZE) { + int to = Math.min(from + MAX_IN_CLAUSE_CHUNK_SIZE, containerIds.size()); + List chunk = containerIds.subList(from, to); + + dslContext.select( + UNHEALTHY_CONTAINERS.CONTAINER_ID, + UNHEALTHY_CONTAINERS.CONTAINER_STATE, + UNHEALTHY_CONTAINERS.IN_STATE_SINCE) + .from(UNHEALTHY_CONTAINERS) + .where(UNHEALTHY_CONTAINERS.CONTAINER_ID.in(chunk)) + .and(UNHEALTHY_CONTAINERS.CONTAINER_STATE.in( + UnHealthyContainerStates.MISSING.toString(), + UnHealthyContainerStates.EMPTY_MISSING.toString(), + UnHealthyContainerStates.UNDER_REPLICATED.toString(), + UnHealthyContainerStates.OVER_REPLICATED.toString(), + UnHealthyContainerStates.MIS_REPLICATED.toString(), + UnHealthyContainerStates.NEGATIVE_SIZE.toString(), + UnHealthyContainerStates.REPLICA_MISMATCH.toString())) + .forEach(record -> existing.put( + new ContainerStateKey(record.get(UNHEALTHY_CONTAINERS.CONTAINER_ID), + record.get(UNHEALTHY_CONTAINERS.CONTAINER_STATE)), + record.get(UNHEALTHY_CONTAINERS.IN_STATE_SINCE))); + } } catch (Exception e) { LOG.warn("Failed to load existing inStateSince records. Falling back to current scan time.", e); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java index 586aad5fd68f..9a79418ac3b2 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java @@ -18,7 +18,11 @@ package org.apache.hadoop.ozone.recon.scm; import static java.util.Comparator.comparingLong; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.CLEANUP; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.CLOSE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.DELETE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.FINALIZE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.QUASI_CLOSE; import com.google.common.annotations.VisibleForTesting; import java.io.IOException; @@ -45,6 +49,7 @@ import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps; import org.apache.hadoop.hdds.scm.ha.SCMHAManager; import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.utils.db.DBStore; @@ -114,8 +119,9 @@ public void checkAndAddNewContainer(ContainerID containerID, datanodeDetails.getHostName()); ContainerWithPipeline containerWithPipeline = scmClient.getContainerWithPipeline(containerID.getId()); + Pipeline pipeline = containerWithPipeline.getPipeline(); LOG.debug("Verified new container from SCM {}, {} ", - containerID, containerWithPipeline.getPipeline().getId()); + containerID, pipeline != null ? pipeline.getId() : ""); // no need call "containerExist" to check, because // 1 containerExist and addNewContainer can not be atomic // 2 addNewContainer will double check the existence @@ -179,33 +185,157 @@ public void checkAndAddNewContainerBatch( } /** - * Check if container state is not open. In SCM, container state - * changes to CLOSING first, and then the close command is pushed down - * to Datanodes. Recon 'learns' this from DN, and hence replica state - * will move container state to 'CLOSING'. + * Transitions a container from OPEN to CLOSING, keeping the per-pipeline + * open-container count in {@link #pipelineToOpenContainer} accurate. * - * @param containerID containerID to check - * @param state state to be compared + *

Must be called whenever an OPEN container is moved to CLOSING so that + * the pipeline's open-container count stays consistent. Both the DN-report + * driven path ({@link #checkContainerStateAndUpdate}) and the periodic sync + * passes ({@code processSyncedClosedContainer}, {@code syncQuasiClosedContainers}) + * use this method to avoid divergence in the count exposed to the Recon Node API. + * + *

If the container was recorded without a pipeline (null pipeline at + * {@code addNewContainer} time) the count decrement is safely skipped. + * + * @param containerID container to advance from OPEN to CLOSING + * @param containerInfo already-fetched {@code ContainerInfo} for the container + * (avoids a redundant lookup inside this method) + * @throws IOException if the state update fails + * @throws InvalidStateTransitionException if the container is not in OPEN state */ - - private void checkContainerStateAndUpdate(ContainerID containerID, - ContainerReplicaProto.State state) - throws IOException, InvalidStateTransitionException { - ContainerInfo containerInfo = getContainer(containerID); - if (containerInfo.getState().equals(HddsProtos.LifeCycleState.OPEN) - && !state.equals(ContainerReplicaProto.State.OPEN) - && isHealthy(state)) { - LOG.info("Container {} has state OPEN, but given state is {}.", - containerID, state); - final PipelineID pipelineID = containerInfo.getPipelineID(); - // subtract open container count from the map + void transitionOpenToClosing(ContainerID containerID, ContainerInfo containerInfo) + throws IOException, InvalidStateTransitionException { + PipelineID pipelineID = containerInfo.getPipelineID(); + if (pipelineID != null) { int curCnt = pipelineToOpenContainer.getOrDefault(pipelineID, 0); if (curCnt == 1) { pipelineToOpenContainer.remove(pipelineID); } else if (curCnt > 0) { pipelineToOpenContainer.put(pipelineID, curCnt - 1); } - updateContainerState(containerID, FINALIZE); + } + updateContainerState(containerID, FINALIZE); // OPEN → CLOSING + } + + /** + * Check if container state needs to advance based on a DN replica report and + * SCM's authoritative lifecycle state. + * + *

Two scenarios handled: + *

    + *
  1. OPEN in Recon + non-OPEN healthy replica → FINALIZE (OPEN→CLOSING), + * then query SCM to advance further if possible.
  2. + *
  3. CLOSING in Recon + any report → query SCM to advance to + * QUASI_CLOSED or CLOSED if SCM has already moved there.
  4. + *
  5. DELETED in Recon + live replica report → rehydrate the container from + * SCM if SCM still records it in a live state such as QUASI_CLOSED or + * CLOSED.
  6. + *
+ * + *

Querying SCM for the authoritative state prevents containers from getting + * permanently stuck at CLOSING when the DN report that would normally + * trigger the next transition was missed (e.g., Recon downtime). + * + * @param containerID containerID to check + * @param replicaState replica state reported by DataNode + */ + private void checkContainerStateAndUpdate(ContainerID containerID, + ContainerReplicaProto.State replicaState) + throws IOException, InvalidStateTransitionException { + ContainerInfo containerInfo = getContainer(containerID); + HddsProtos.LifeCycleState reconState = containerInfo.getState(); + + if (reconState == HddsProtos.LifeCycleState.DELETED) { + recoverDeletedContainerFromScm(containerID, replicaState); + return; + } + + // Only act on transient pre-closed states where a DN report signals change + boolean isTransient = reconState == HddsProtos.LifeCycleState.OPEN + || reconState == HddsProtos.LifeCycleState.CLOSING; + if (!isTransient + || replicaState == ContainerReplicaProto.State.OPEN + || !isHealthy(replicaState)) { + return; + } + + if (reconState == HddsProtos.LifeCycleState.OPEN) { + LOG.info("Container {} is OPEN in Recon but DN reports replica state {}. " + + "Moving to CLOSING.", containerID, replicaState); + transitionOpenToClosing(containerID, containerInfo); // OPEN → CLOSING + counter update + // Fall through: now CLOSING — query SCM to advance further if possible + } + + // Container is now CLOSING in Recon. Query SCM for the authoritative + // state so we do not permanently stick at CLOSING when the next DN + // transition report was missed. + try { + ContainerWithPipeline scmContainer = + scmClient.getContainerWithPipeline(containerID.getId()); + HddsProtos.LifeCycleState scmState = + scmContainer.getContainerInfo().getState(); + + // Idempotent transitions are safe even if already past the target state. + if (scmState == HddsProtos.LifeCycleState.QUASI_CLOSED) { + updateContainerState(containerID, QUASI_CLOSE); // CLOSING → QUASI_CLOSED + LOG.info("Container {} advanced to QUASI_CLOSED in Recon (SCM state: {}).", + containerID, scmState); + } else if (scmState == HddsProtos.LifeCycleState.CLOSED) { + updateContainerState(containerID, CLOSE); // CLOSING → CLOSED + LOG.info("Container {} advanced to CLOSED in Recon (SCM state: {}).", + containerID, scmState); + } else if (scmState == HddsProtos.LifeCycleState.DELETING + || scmState == HddsProtos.LifeCycleState.DELETED) { + // Unusual but possible: SCM already deleted this container. + // Drive through CLOSE first (idempotent), then DELETE, then CLEANUP. + updateContainerState(containerID, CLOSE); + updateContainerState(containerID, DELETE); + if (scmState == HddsProtos.LifeCycleState.DELETED) { + updateContainerState(containerID, CLEANUP); + } + LOG.info("Container {} advanced to {} in Recon (SCM state: {}).", + containerID, scmState, scmState); + } + // If scmState is still CLOSING: nothing more to do now; wait for next report. + } catch (IOException e) { + LOG.warn("Failed to fetch authoritative state for container {} from SCM. " + + "Container may remain in CLOSING until next periodic sync.", containerID, e); + } + } + + private void recoverDeletedContainerFromScm( + ContainerID containerID, ContainerReplicaProto.State replicaState) + throws IOException { + if (replicaState != ContainerReplicaProto.State.CLOSED + && replicaState != ContainerReplicaProto.State.QUASI_CLOSED) { + return; + } + + try { + ContainerWithPipeline scmContainer = + scmClient.getContainerWithPipeline(containerID.getId()); + HddsProtos.LifeCycleState scmState = + scmContainer.getContainerInfo().getState(); + if (scmState != HddsProtos.LifeCycleState.CLOSED + && scmState != HddsProtos.LifeCycleState.QUASI_CLOSED) { + LOG.info("Container {} is DELETED in Recon and DN reported {}, but SCM " + + "still reports {}. Skipping recovery.", containerID, replicaState, scmState); + return; + } + + // Reverse transitions are not supported by the lifecycle state machine, + // so rebuild the container record from SCM's authoritative metadata. + deleteContainer(containerID); + addNewContainer(scmContainer); + LOG.info("Recovered container {} from DELETED in Recon to {} based on " + + "DN report {} and SCM state {}.", containerID, scmState, replicaState, scmState); + } catch (ContainerNotFoundException e) { + LOG.warn("Container {} disappeared from Recon while recovering DELETED " + + "state; retry on next report.", containerID, e); + } catch (IOException e) { + LOG.warn("Failed to recover container {} from DELETED state using SCM " + + "metadata.", containerID, e); + throw e; } } @@ -218,7 +348,13 @@ private boolean isHealthy(ContainerReplicaProto.State replicaState) { /** * Adds a new container to Recon's container manager. * - * @param containerWithPipeline containerInfo with pipeline info + *

For OPEN containers a valid pipeline is expected. If the pipeline is + * {@code null} (e.g., returned by SCM when the pipeline has already been + * cleaned up for a QUASI_CLOSED container that arrived via the sync path), + * the container is still recorded in the state manager without pipeline + * tracking so that it is not permanently absent from Recon. + * + * @param containerWithPipeline containerInfo with pipeline info (pipeline may be null) * @throws IOException on Error. */ public void addNewContainer(ContainerWithPipeline containerWithPipeline) @@ -227,33 +363,41 @@ public void addNewContainer(ContainerWithPipeline containerWithPipeline) ContainerInfo containerInfo = containerWithPipeline.getContainerInfo(); try { if (containerInfo.getState().equals(HddsProtos.LifeCycleState.OPEN)) { - PipelineID pipelineID = containerWithPipeline.getPipeline().getId(); - // Check if the pipeline is present in Recon if not add it. - if (reconPipelineManager.addPipeline(containerWithPipeline.getPipeline())) { - LOG.info("Added new pipeline {} to Recon pipeline metadata from SCM.", pipelineID); + Pipeline pipeline = containerWithPipeline.getPipeline(); + if (pipeline != null) { + PipelineID pipelineID = pipeline.getId(); + // Check if the pipeline is present in Recon; add it if not. + if (reconPipelineManager.addPipeline(pipeline)) { + LOG.info("Added new pipeline {} to Recon pipeline metadata from SCM.", pipelineID); + } + getContainerStateManager().addContainer(containerInfo.getProtobuf()); + pipelineManager.addContainerToPipeline(pipelineID, containerInfo.containerID()); + // Update open container count on all datanodes on this pipeline. + pipelineToOpenContainer.put(pipelineID, + pipelineToOpenContainer.getOrDefault(pipelineID, 0) + 1); + LOG.info("Successfully added OPEN container {} with pipeline {} to Recon.", + containerInfo.containerID(), pipelineID); + } else { + // Pipeline not available (cleaned up in SCM). Record the container + // without pipeline tracking so it is not permanently absent from Recon. + getContainerStateManager().addContainer(containerInfo.getProtobuf()); + LOG.warn("Added OPEN container {} to Recon without pipeline " + + "(pipeline was null — likely cleaned up on SCM side). " + + "Pipeline tracking unavailable for this container.", + containerInfo.containerID()); } - - getContainerStateManager().addContainer(containerInfo.getProtobuf()); - pipelineManager.addContainerToPipeline( - containerWithPipeline.getPipeline().getId(), - containerInfo.containerID()); - // update open container count on all datanodes on this pipeline - pipelineToOpenContainer.put(pipelineID, - pipelineToOpenContainer.getOrDefault(pipelineID, 0) + 1); - LOG.info("Successfully added container {} to Recon.", - containerInfo.containerID()); - } else { getContainerStateManager().addContainer(containerInfo.getProtobuf()); - LOG.info("Successfully added no open container {} to Recon.", - containerInfo.containerID()); + LOG.info("Successfully added container {} in state {} to Recon.", + containerInfo.containerID(), containerInfo.getState()); } } catch (IOException ex) { - LOG.info("Exception while adding container {} .", - containerInfo.containerID(), ex); - pipelineManager.removeContainerFromPipeline( - containerInfo.getPipelineID(), - ContainerID.valueOf(containerInfo.getContainerID())); + LOG.info("Exception while adding container {}.", containerInfo.containerID(), ex); + PipelineID pipelineID = containerInfo.getPipelineID(); + if (pipelineID != null) { + pipelineManager.removeContainerFromPipeline( + pipelineID, ContainerID.valueOf(containerInfo.getContainerID())); + } throw ex; } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 278bac0011dc..49792ae99cdd 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -24,13 +24,16 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_SCM_CLIENT_FAILOVER_MAX_RETRY; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_SCM_CLIENT_MAX_RETRY_TIMEOUT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_SCM_CLIENT_RPC_TIME_OUT; -import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_KEY; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INTERVAL_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INTERVAL_DELAY; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DEFAULT; @@ -45,6 +48,7 @@ import java.net.InetSocketAddress; import java.time.Clock; import java.time.ZoneId; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -419,7 +423,9 @@ public void start() { "Recon ScmDatanodeProtocol RPC server", getDatanodeProtocolServer().getDatanodeRpcAddress())); } - ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1, + // Two threads: one for the periodic full-snapshot task and one for the + // incremental-sync/decideSyncAction task so they never block each other. + ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(2, new ThreadFactoryBuilder().setNameFormat(threadNamePrefix + "SyncSCMContainerInfo-%d") .build()); @@ -432,34 +438,98 @@ public void start() { } else { initializePipelinesFromScm(); } - LOG.debug("Started the SCM Container Info sync scheduler."); - long interval = ozoneConfiguration.getTimeDuration( + // ----------------------------------------------------------------------- + // Scheduler 1 (full snapshot): runs every 24h (default). + // Unconditionally replaces Recon's recon-scm.db with a fresh SCM + // checkpoint. This is the safety net that keeps the two databases + // structurally in sync even if incremental sync misses an edge case. + // ----------------------------------------------------------------------- + long snapshotInterval = ozoneConfiguration.getTimeDuration( OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DELAY, OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); - long initialDelay = ozoneConfiguration.getTimeDuration( + long snapshotInitialDelay = ozoneConfiguration.getTimeDuration( OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY, OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT, TimeUnit.MILLISECONDS); - // This periodic sync with SCM container cache is needed because during - // the window when recon will be down and any container being added - // newly and went missing, that container will not be reported as missing by - // recon till there is a difference of container count equivalent to - // threshold value defined in "ozone.recon.scm.container.threshold" - // between SCM container cache and recon container cache. scheduler.scheduleWithFixedDelay(() -> { try { - boolean isSuccess = syncWithSCMContainerInfo(); - if (!isSuccess) { - LOG.debug("SCM container info sync is already running."); + updateReconSCMDBWithNewSnapshot(); + } catch (IOException e) { + LOG.error("Failed to refresh Recon SCM DB snapshot.", e); + } + }, snapshotInitialDelay, snapshotInterval, TimeUnit.MILLISECONDS); + + // ----------------------------------------------------------------------- + // Scheduler 2 (incremental/targeted sync): runs every 1h (default). + // + // Each cycle calls decideSyncAction() — two lightweight count RPCs to SCM + // — and then: + // + // |total drift| > threshold (default 10,000) + // → full snapshot: replace Recon's entire SCM DB from SCM checkpoint + // + // 0 < |total drift| <= threshold + // → targeted sync: 4-pass incremental repair + // + // total drift = 0 but per-state drift (OPEN or QUASI_CLOSED) > threshold (default 5) + // → targeted sync: corrects containers stuck in a stale lifecycle state + // + // no drift detected + // → no action this cycle + // + // Running this on a 1h cadence (vs the old 24h) means container state + // discrepancies are detected and corrected within an hour without waiting + // for the next full snapshot. + // ----------------------------------------------------------------------- + long syncInterval = ozoneConfiguration.getTimeDuration( + OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INTERVAL_DELAY, + OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + long syncInitialDelay = ozoneConfiguration.getTimeDuration( + OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY, + OZONE_RECON_SCM_CONTAINER_SYNC_TASK_INITIAL_DELAY_DEFAULT, + TimeUnit.MILLISECONDS); + LOG.debug("Started the SCM Container Info sync scheduler (interval={}ms, initialDelay={}ms).", + syncInterval, syncInitialDelay); + scheduler.scheduleWithFixedDelay(() -> { + if (!isSyncDataFromSCMRunning.compareAndSet(false, true)) { + LOG.debug("SCM container info sync is already running; skipping this cycle."); + return; + } + try { + ReconStorageContainerSyncHelper.SyncAction action = + containerSyncHelper.decideSyncAction(); + switch (action) { + case FULL_SNAPSHOT: + LOG.info("Tiered sync decision: FULL_SNAPSHOT. " + + "Replacing Recon SCM DB with fresh SCM checkpoint."); + // updateReconSCMDBWithNewSnapshot guards itself with its own CAS; + // release our guard first so its internal guard can acquire. + isSyncDataFromSCMRunning.set(false); + updateReconSCMDBWithNewSnapshot(); + return; // finally block below will not double-release + case TARGETED_SYNC: + LOG.info("Tiered sync decision: TARGETED_SYNC. Running 4-pass incremental sync."); + boolean success = containerSyncHelper.syncWithSCMContainerInfo(); + if (!success) { + LOG.warn("Targeted sync completed with one or more pass failures. " + + "Check logs above for details."); + } + break; + case NO_ACTION: + LOG.debug("Tiered sync decision: NO_ACTION. No drift detected this cycle."); + break; + default: + LOG.warn("Unknown SyncAction {}; skipping sync.", action); + break; } } catch (Throwable t) { - LOG.error("Unexpected exception while syncing data from SCM.", t); + LOG.error("Unexpected exception during periodic SCM container sync.", t); } finally { isSyncDataFromSCMRunning.compareAndSet(true, false); } }, - initialDelay, - interval, + syncInitialDelay, + syncInterval, TimeUnit.MILLISECONDS); getDatanodeProtocolServer().start(); reconSafeModeMgrTask.start(); @@ -550,77 +620,114 @@ private void initializeSCMDB() { public void updateReconSCMDBWithNewSnapshot() throws IOException { if (isSyncDataFromSCMRunning.compareAndSet(false, true)) { - DBCheckpoint dbSnapshot = scmServiceProvider.getSCMDBSnapshot(); - if (dbSnapshot != null && dbSnapshot.getCheckpointLocation() != null) { - LOG.info("Got new checkpoint from SCM : " + - dbSnapshot.getCheckpointLocation()); - try { - initializeNewRdbStore(dbSnapshot.getCheckpointLocation().toFile()); - } catch (IOException e) { - LOG.error("Unable to refresh Recon SCM DB Snapshot. ", e); + try { + DBCheckpoint dbSnapshot = scmServiceProvider.getSCMDBSnapshot(); + if (dbSnapshot != null && dbSnapshot.getCheckpointLocation() != null) { + LOG.info("Got new checkpoint from SCM : " + + dbSnapshot.getCheckpointLocation()); + try { + initializeNewRdbStore(dbSnapshot.getCheckpointLocation().toFile()); + } catch (IOException e) { + LOG.error("Unable to refresh Recon SCM DB Snapshot. ", e); + } + } else { + LOG.error("Null snapshot location got from SCM."); } - } else { - LOG.error("Null snapshot location got from SCM."); + } finally { + isSyncDataFromSCMRunning.compareAndSet(true, false); } } else { LOG.warn("SCM DB sync is already running."); } } + /** + * Runs the four-pass targeted sync unconditionally (all states: CLOSED, + * OPEN, QUASI_CLOSED, and DELETED). This method is the direct + * entry point for the REST trigger endpoint + * {@code POST /api/v1/triggerdbsync/scm} and for any caller that explicitly + * wants an incremental sync rather than a drift-evaluated decision. + * + *

For the periodic scheduler the tiered + * {@link ReconStorageContainerSyncHelper#decideSyncAction()} path is used + * instead, which may escalate to a full snapshot or skip work entirely + * depending on observed drift. + */ public boolean syncWithSCMContainerInfo() { if (isSyncDataFromSCMRunning.compareAndSet(false, true)) { - return containerSyncHelper.syncWithSCMContainerInfo(); + try { + return containerSyncHelper.syncWithSCMContainerInfo(); + } finally { + isSyncDataFromSCMRunning.compareAndSet(true, false); + } } else { LOG.debug("SCM DB sync is already running."); return false; } } - private void deleteOldSCMDB() throws IOException { - if (dbStore != null) { - File oldDBLocation = dbStore.getDbLocation(); - if (oldDBLocation.exists()) { - LOG.info("Cleaning up old SCM snapshot db at {}.", - oldDBLocation.getAbsolutePath()); - FileUtils.deleteDirectory(oldDBLocation); - } + private void deleteSCMDB(File dbLocation) throws IOException { + if (dbLocation != null && dbLocation.exists()) { + LOG.info("Cleaning up old SCM snapshot db at {}.", + dbLocation.getAbsolutePath()); + FileUtils.deleteDirectory(dbLocation); } } private void initializeNewRdbStore(File dbFile) throws IOException { - try { - final DBStore newStore = DBStoreBuilder.newBuilder(ozoneConfiguration, ReconSCMDBDefinition.get(), dbFile) - .build(); - final Table nodeTable = ReconSCMDBDefinition.NODES.getTable(dbStore); - final Table newNodeTable = ReconSCMDBDefinition.NODES.getTable(newStore); - try (TableIterator> iterator = nodeTable.iterator()) { + final DBStore oldStore = dbStore; + final File oldDbLocation = oldStore != null ? oldStore.getDbLocation() : null; + final File newDb = new File(dbFile.getParent(), + ReconSCMDBDefinition.RECON_SCM_DB_NAME); + + Map existingNodes = new HashMap<>(); + if (oldStore != null) { + final Table nodeTable = + ReconSCMDBDefinition.NODES.getTable(oldStore); + try (TableIterator> iterator = + nodeTable.iterator()) { while (iterator.hasNext()) { - final KeyValue keyValue = iterator.next(); - newNodeTable.put(keyValue.getKey(), keyValue.getValue()); + final KeyValue keyValue = + iterator.next(); + existingNodes.put(keyValue.getKey(), keyValue.getValue()); } } - sequenceIdGen.reinitialize( - ReconSCMDBDefinition.SEQUENCE_ID.getTable(newStore)); - pipelineManager.reinitialize( - ReconSCMDBDefinition.PIPELINES.getTable(newStore)); - containerManager.reinitialize( - ReconSCMDBDefinition.CONTAINERS.getTable(newStore)); - nodeManager.reinitialize( - ReconSCMDBDefinition.NODES.getTable(newStore)); - IOUtils.close(LOG, dbStore); - deleteOldSCMDB(); - dbStore = newStore; - File newDb = new File(dbFile.getParent() + - OZONE_URI_DELIMITER + ReconSCMDBDefinition.RECON_SCM_DB_NAME); - boolean success = dbFile.renameTo(newDb); - if (success) { - LOG.info("SCM snapshot linked to Recon DB."); + } + + IOUtils.close(LOG, oldStore); + if (oldDbLocation != null && !oldDbLocation.equals(dbFile)) { + deleteSCMDB(oldDbLocation); + } + + if (!dbFile.equals(newDb)) { + if (newDb.exists()) { + deleteSCMDB(newDb); } - LOG.info("Created SCM DB handle from snapshot at {}.", - dbFile.getAbsolutePath()); - } catch (IOException ioEx) { - LOG.error("Unable to initialize Recon SCM DB snapshot store.", ioEx); + FileUtils.moveDirectory(dbFile, newDb); + LOG.info("SCM snapshot moved to Recon DB path {}.", + newDb.getAbsolutePath()); } + + final DBStore newStore = DBStoreBuilder.newBuilder( + ozoneConfiguration, ReconSCMDBDefinition.get(), newDb).build(); + final Table newNodeTable = + ReconSCMDBDefinition.NODES.getTable(newStore); + for (Map.Entry entry : existingNodes.entrySet()) { + newNodeTable.put(entry.getKey(), entry.getValue()); + } + + sequenceIdGen.reinitialize( + ReconSCMDBDefinition.SEQUENCE_ID.getTable(newStore)); + pipelineManager.reinitialize( + ReconSCMDBDefinition.PIPELINES.getTable(newStore)); + containerManager.reinitialize( + ReconSCMDBDefinition.CONTAINERS.getTable(newStore)); + nodeManager.reinitialize( + ReconSCMDBDefinition.NODES.getTable(newStore)); + dbStore = newStore; + LOG.info("Created SCM DB handle from snapshot at {}.", + newDb.getAbsolutePath()); } @Override diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerSyncHelper.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerSyncHelper.java index c8d940aa8357..a8bf07c9f9cc 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerSyncHelper.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerSyncHelper.java @@ -19,25 +19,83 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH; import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_MAXIMUM_DATA_LENGTH_DEFAULT; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.CLEANUP; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.CLOSE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.DELETE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.FORCE_CLOSE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.QUASI_CLOSE; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_ID_BATCH_SIZE; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_ID_BATCH_SIZE_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_THRESHOLD; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD_DEFAULT; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException; import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + * Helper class that performs targeted incremental sync between SCM and Recon + * container metadata. Executes four passes per sync cycle: + * + *

    + *
  1. Pass 1 — CLOSED (SCM-driven, add + correct): fetches SCM's + * CLOSED container ID list, adds any absent from Recon, and corrects + * containers that are OPEN or CLOSING in Recon but CLOSED in SCM.
  2. + *
  3. Pass 2 — OPEN (SCM-driven, add only): adds OPEN containers + * that are absent from Recon entirely (e.g., created while Recon was + * down).
  4. + *
  5. Pass 3 — QUASI_CLOSED (SCM-driven, add only): adds + * QUASI_CLOSED containers absent from Recon. Requires that SCM returns + * container metadata with a null pipeline when pipeline lookup fails, and + * that Recon's {@code addNewContainer} handles a null pipeline gracefully; + * otherwise QUASI_CLOSED containers whose pipelines have been cleaned up + * will fail with {@code NullPointerException} or {@code IOException}.
  6. + *
  7. Pass 4 — DELETED retirement (Recon-driven, transition only): + * scans Recon's CLOSED and QUASI_CLOSED containers in batches, queries + * SCM for each, and transitions any that SCM reports as DELETED. + * Intentionally Recon-driven (not SCM-driven) because SCM's DELETED + * list grows unboundedly; starting from Recon's bounded set of + * non-terminal containers is always more efficient.
  8. + *
+ */ class ReconStorageContainerSyncHelper { // Serialized size of one ContainerID proto on the wire (varint tag + 8-byte long = ~12 bytes). // Used to derive the maximum batch size that fits within ipc.maximum.data.length. private static final long CONTAINER_ID_PROTO_SIZE_BYTES = 12; + /** + * Rotating cursor for Pass 4 (DELETED retirement). Tracks the list position + * where the next sync cycle should begin so that all candidates are + * eventually covered regardless of batch size. Volatile because it is + * updated by the scheduler thread and read by tests. + */ + private volatile int pass4BatchOffset = 0; + /** + * Monotonic cursor for Pass 2 (OPEN add-only sync). OPEN containers are + * created with increasing container IDs, so each cycle only needs to scan + * from the last-seen ID onward rather than rescanning the full OPEN set. + */ + private volatile long pass2OpenStartContainerId = 1L; + private static final Logger LOG = LoggerFactory .getLogger(ReconStorageContainerSyncHelper.class); @@ -45,6 +103,32 @@ class ReconStorageContainerSyncHelper { private final OzoneConfiguration ozoneConfiguration; private final ReconContainerManager containerManager; + /** + * Describes the action that the periodic scheduler should take based on the + * observed drift between SCM and Recon container metadata. + */ + public enum SyncAction { + /** + * No drift detected — no sync work needed this cycle. + */ + NO_ACTION, + + /** + * Small or per-state drift detected — run the four-pass targeted sync. + * This is the normal steady-state response: cheaper than a full snapshot + * and sufficient for the vast majority of drift scenarios. + */ + TARGETED_SYNC, + + /** + * Large total-count drift detected — replace Recon's entire SCM DB with a + * fresh checkpoint from SCM. Reserved for cases where targeted sync would + * be unreliable (e.g., Recon was down for hours and hundreds of containers + * changed state). + */ + FULL_SNAPSHOT + } + ReconStorageContainerSyncHelper(StorageContainerServiceProvider scmServiceProvider, OzoneConfiguration ozoneConfiguration, ReconContainerManager containerManager) { @@ -53,52 +137,580 @@ class ReconStorageContainerSyncHelper { this.containerManager = containerManager; } + /** + * Decides what sync action the periodic scheduler should take based on the + * observed drift between SCM and Recon. + * + *

Decision logic: + *

    + *
  1. If {@code |(SCM_total - SCM_open) - (Recon_total - Recon_open)| > + * ozone.recon.scm.container.threshold} (default 10,000): return + * {@link SyncAction#FULL_SNAPSHOT}. Large drift in non-OPEN containers + * means Recon is badly behind on stable SCM state and a full checkpoint + * replacement is cheaper and more reliable at that scale.
  2. + *
  3. If total drift is positive but the non-OPEN drift is at or below the + * threshold: return {@link SyncAction#TARGETED_SYNC}. This keeps large + * OPEN-only gaps on the incremental path because missing OPEN + * containers can be repaired cheaply without replacing the full SCM DB.
  4. + *
  5. If total drift is zero, check per-state drift for each active + * (non-terminal) lifecycle state against + * {@code ozone.recon.scm.per.state.drift.threshold} (default 5): + *
      + *
    • OPEN: detects containers stuck OPEN in Recon after SCM + * has advanced them to QUASI_CLOSED or CLOSED.
    • + *
    • QUASI_CLOSED: detects containers stuck QUASI_CLOSED in + * Recon after SCM has advanced them to CLOSED. This case produces + * zero OPEN drift and is invisible to an OPEN-only check.
    • + *
    + * If drift in any checked state exceeds the threshold: + * return {@link SyncAction#TARGETED_SYNC}.
  6. + *
  7. Otherwise: return {@link SyncAction#NO_ACTION}.
  8. + *
+ * + *

Per-state drift deliberately routes to targeted sync, not a full + * snapshot — the targeted sync's per-state passes correct each condition + * efficiently without replacing the entire database. + * + * @return the recommended {@link SyncAction} + * @throws IOException if SCM RPC calls to retrieve counts fail + */ + public SyncAction decideSyncAction() throws IOException { + int largeThreshold = ozoneConfiguration.getInt( + OZONE_RECON_SCM_CONTAINER_THRESHOLD, + OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT); + int perStateDriftThreshold = ozoneConfiguration.getInt( + OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD, + OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD_DEFAULT); + List reconContainers = containerManager.getContainers(); + long reconTotal = reconContainers.size(); + long reconOpen = reconContainers.stream() + .filter(c -> c.getState() == HddsProtos.LifeCycleState.OPEN) + .count(); + + // --- Check 1: large non-OPEN drift escalates to full snapshot --- + long scmTotal = scmServiceProvider.getContainerCount(); + long scmOpen = scmServiceProvider.getContainerCount(HddsProtos.LifeCycleState.OPEN); + long totalDrift = Math.abs(scmTotal - reconTotal); + long scmNonOpen = Math.max(0, scmTotal - scmOpen); + long reconNonOpen = Math.max(0, reconTotal - reconOpen); + long nonOpenDrift = Math.abs(scmNonOpen - reconNonOpen); + + if (nonOpenDrift > largeThreshold) { + LOG.warn("Non-OPEN container drift {} exceeds threshold {} " + + "(SCM_non_OPEN={}, Recon_non_OPEN={}, SCM_total={}, Recon_total={}). " + + "Triggering full snapshot.", + nonOpenDrift, largeThreshold, scmNonOpen, reconNonOpen, scmTotal, reconTotal); + return SyncAction.FULL_SNAPSHOT; + } + if (totalDrift > 0) { + LOG.info("Total container drift {} detected (SCM={}, Recon={}). " + + "Non-OPEN drift is {} (SCM_non_OPEN={}, Recon_non_OPEN={}), so " + + "using targeted sync.", + totalDrift, scmTotal, reconTotal, nonOpenDrift, scmNonOpen, reconNonOpen); + return SyncAction.TARGETED_SYNC; + } + + // --- Check 2: per-state drift (total drift = 0, lifecycle state may lag) --- + // + // These checks intentionally use the lightweight per-state count RPCs so + // the decision path remains cheap. CLOSED is derived as the remainder after + // subtracting OPEN and QUASI_CLOSED from the total on each side. + long scmQuasiClosed = + scmServiceProvider.getContainerCount(HddsProtos.LifeCycleState.QUASI_CLOSED); + long reconQuasiClosed = reconContainers.stream() + .filter(c -> c.getState() == HddsProtos.LifeCycleState.QUASI_CLOSED) + .count(); + long scmClosed = Math.max(0, scmTotal - scmOpen - scmQuasiClosed); + long reconClosed = Math.max(0, reconTotal - reconOpen - reconQuasiClosed); + + for (Object[] entry : new Object[][]{ + {HddsProtos.LifeCycleState.OPEN, scmOpen, reconOpen}, + {HddsProtos.LifeCycleState.QUASI_CLOSED, scmQuasiClosed, reconQuasiClosed}, + {HddsProtos.LifeCycleState.CLOSED, scmClosed, reconClosed}}) { + HddsProtos.LifeCycleState state = (HddsProtos.LifeCycleState) entry[0]; + long scmCount = (long) entry[1]; + long reconCount = (long) entry[2]; + long drift = Math.abs(scmCount - reconCount); + if (drift > perStateDriftThreshold) { + LOG.info("Per-state {} drift {} detected (SCM_{}={}, Recon_{}={}, threshold={}). " + + "Total counts are equal — targeted sync will correct stale states.", + state, drift, state, scmCount, state, reconCount, perStateDriftThreshold); + return SyncAction.TARGETED_SYNC; + } + } + + LOG.info("No significant drift detected (total drift={}). No sync needed.", totalDrift); + return SyncAction.NO_ACTION; + } + + /** + * Runs all four sync passes and returns {@code true} if all passes completed + * without a fatal error. + */ public boolean syncWithSCMContainerInfo() { + boolean pass1 = syncClosedContainers(); + boolean pass2 = syncOpenContainersIncrementally(); + boolean pass3 = syncQuasiClosedContainers(); + boolean pass4 = retireDeletedContainers(); + return pass1 && pass2 && pass3 && pass4; + } + + // --------------------------------------------------------------------------- + // Pass 1: CLOSED containers — add missing, correct stale OPEN/CLOSING state + // --------------------------------------------------------------------------- + + /** + * Fetches SCM's full CLOSED container ID list (paginated) and for each entry: + *

    + *
  • If absent from Recon: calls {@code addNewContainer()}.
  • + *
  • If present in Recon as OPEN or CLOSING: advances to CLOSED + * via the appropriate lifecycle events.
  • + *
  • If already CLOSED (or past): no action.
  • + *
+ */ + private boolean syncClosedContainers() { try { - long totalContainerCount = scmServiceProvider.getContainerCount( + long totalClosed = scmServiceProvider.getContainerCount( HddsProtos.LifeCycleState.CLOSED); - long containerCountPerCall = - getContainerCountPerCall(totalContainerCount); + if (totalClosed == 0) { + LOG.debug("No CLOSED containers found in SCM."); + return true; + } + ContainerID startContainerId = ContainerID.valueOf(1); - long retrievedContainerCount = 0; - if (totalContainerCount > 0) { - while (retrievedContainerCount < totalContainerCount) { - List listOfContainers = scmServiceProvider. - getListOfContainerIDs(startContainerId, - Long.valueOf(containerCountPerCall).intValue(), - HddsProtos.LifeCycleState.CLOSED); - if (null != listOfContainers && !listOfContainers.isEmpty()) { - LOG.info("Got list of containers from SCM : {}", listOfContainers.size()); - listOfContainers.forEach(containerID -> { - boolean isContainerPresentAtRecon = containerManager.containerExist(containerID); - if (!isContainerPresentAtRecon) { - try { - ContainerWithPipeline containerWithPipeline = - scmServiceProvider.getContainerWithPipeline( - containerID.getId()); - containerManager.addNewContainer(containerWithPipeline); - } catch (IOException e) { - LOG.error("Could not get container with pipeline " + - "for container : {}", containerID); - } + long retrieved = 0; + + while (retrieved < totalClosed) { + List batch = getContainerIDsByState( + startContainerId, HddsProtos.LifeCycleState.CLOSED); + if (batch == null || batch.isEmpty()) { + LOG.warn("Pass 1 (CLOSED): SCM reported {} CLOSED containers, but " + + "returned an empty batch after {} were retrieved.", totalClosed, retrieved); + return false; + } + + LOG.info("Pass 1 (CLOSED): processing batch of {} containers.", batch.size()); + for (ContainerID containerID : batch) { + processSyncedClosedContainer(containerID); + } + + long lastID = batch.get(batch.size() - 1).getId(); + startContainerId = ContainerID.valueOf(lastID + 1); + retrieved += batch.size(); + } + + LOG.info("Pass 1 (CLOSED): sync complete, checked {} containers.", retrieved); + return true; + } catch (Exception e) { + LOG.error("Pass 1 (CLOSED): unexpected error during sync.", e); + return false; + } + } + + /** + * Processes a single container ID from SCM's CLOSED list: + * adds it to Recon if absent, or corrects its state if stale. + */ + private void processSyncedClosedContainer(ContainerID containerID) { + if (!containerManager.containerExist(containerID)) { + // Container completely absent from Recon — add it. + // Use the batch API instead of the individual getContainerWithPipeline: the batch API + // has a null-pipeline fallback that returns the container even when the pipeline lookup + // fails (e.g., pipeline cleaned up on SCM side or createPipelineForRead fails for + // containers with 0 replicas). The individual call throws IOException in those cases + // and silently skips the container, leaving it permanently absent from Recon. + List cwpList = + scmServiceProvider.getExistContainerWithPipelinesInBatch( + Collections.singletonList(containerID.getId())); + if (cwpList.isEmpty()) { + LOG.warn("Pass 1 (CLOSED): container {} not returned by SCM; skipping.", containerID); + return; + } + try { + containerManager.addNewContainer(cwpList.get(0)); + LOG.info("Pass 1 (CLOSED): added missing container {}.", containerID); + } catch (IOException e) { + LOG.error("Pass 1 (CLOSED): could not add missing container {}.", containerID, e); + } + return; + } + + // Container exists in Recon — check if its state is stale. + try { + ContainerInfo reconContainer = containerManager.getContainer(containerID); + HddsProtos.LifeCycleState reconState = reconContainer.getState(); + + if (reconState == HddsProtos.LifeCycleState.OPEN) { + LOG.info("Pass 1 (CLOSED): container {} is OPEN in Recon but CLOSED in SCM. " + + "Correcting state.", containerID); + // OPEN → CLOSING; transitionOpenToClosing also decrements pipelineToOpenContainer + // so the Node API's open-container-per-pipeline count stays accurate. + containerManager.transitionOpenToClosing(containerID, reconContainer); + reconState = HddsProtos.LifeCycleState.CLOSING; + } + + if (reconState == HddsProtos.LifeCycleState.CLOSING) { + // CLOSING → CLOSED (CLOSE is idempotent at CLOSED and beyond) + containerManager.updateContainerState(containerID, CLOSE); + LOG.info("Pass 1 (CLOSED): container {} corrected from CLOSING to CLOSED.", containerID); + reconState = HddsProtos.LifeCycleState.CLOSED; + } + + if (reconState == HddsProtos.LifeCycleState.QUASI_CLOSED) { + // QUASI_CLOSED → CLOSED: SCM has already completed the quorum decision + // (the container is definitively CLOSED in SCM), so Recon should + // reflect that. FORCE_CLOSE is the only valid event for this transition. + containerManager.updateContainerState(containerID, FORCE_CLOSE); + LOG.info("Pass 1 (CLOSED): container {} corrected from QUASI_CLOSED to CLOSED " + + "via FORCE_CLOSE.", containerID); + } + } catch (ContainerNotFoundException e) { + LOG.warn("Pass 1 (CLOSED): container {} vanished from Recon between existence " + + "check and state read.", containerID, e); + } catch (InvalidStateTransitionException | IOException e) { + LOG.warn("Pass 1 (CLOSED): failed to correct state for container {}.", containerID, e); + } + } + + // --------------------------------------------------------------------------- + // Pass 2 / Pass 3: Add-only sync for OPEN and QUASI_CLOSED containers + // --------------------------------------------------------------------------- + + /** + * Fetches only the newly created OPEN containers from SCM, starting at the + * last-seen OPEN container ID from the previous cycle, and adds any that are + * absent from Recon. + * + *

This deliberately avoids rescanning the full OPEN set every cycle. + * OPEN container IDs are monotonic, so once Recon has scanned through a + * given ID range it can continue from the next ID in later cycles. This + * keeps OPEN drift on an incremental path while CLOSED/QUASI_CLOSED still use + * full state scans for correction. + */ + private boolean syncOpenContainersIncrementally() { + try { + long totalOpen = scmServiceProvider.getContainerCount(HddsProtos.LifeCycleState.OPEN); + if (totalOpen == 0) { + LOG.debug("Pass 2 (OPEN): no containers found in SCM."); + return true; + } + + long retrieved = 0; + int addedCount = 0; + long batchSize = Math.min(totalOpen, getStatePaginationBatchSize()); + ContainerID startContainerId = ContainerID.valueOf(pass2OpenStartContainerId); + + while (true) { + List batch = scmServiceProvider.getListOfContainerIDs( + startContainerId, (int) batchSize, HddsProtos.LifeCycleState.OPEN); + if (batch == null || batch.isEmpty()) { + LOG.info("Pass 2 (OPEN): sync complete from cursor {}, checked {}, added {}.", + pass2OpenStartContainerId, retrieved, addedCount); + return true; + } + + addedCount += addMissingContainersForState(batch, HddsProtos.LifeCycleState.OPEN); + retrieved += batch.size(); + + long lastID = batch.get(batch.size() - 1).getId(); + pass2OpenStartContainerId = lastID + 1; + startContainerId = ContainerID.valueOf(pass2OpenStartContainerId); + } + } catch (Exception e) { + LOG.error("Pass 2 (OPEN): unexpected error during sync.", e); + return false; + } + } + + private int addMissingContainersForState(List batch, + HddsProtos.LifeCycleState state) { + // Collect all missing container IDs in this page and fetch them in one + // batch RPC. The batch API has a null-pipeline fallback: if a pipeline + // lookup fails (e.g., pipeline not yet OPEN or cleaned up), SCM still + // returns the container with pipeline=null so Recon can record it. + List missingIds = new ArrayList<>(); + for (ContainerID containerID : batch) { + if (!containerManager.containerExist(containerID)) { + missingIds.add(containerID.getId()); + } + } + if (missingIds.isEmpty()) { + return 0; + } + + int addedCount = 0; + List cwpList = + scmServiceProvider.getExistContainerWithPipelinesInBatch(missingIds); + for (ContainerWithPipeline cwp : cwpList) { + try { + containerManager.addNewContainer(cwp); + addedCount++; + LOG.info("Pass ({}): added missing container {}.", state, + cwp.getContainerInfo().getContainerID()); + } catch (IOException e) { + LOG.error("Pass ({}): could not add missing container {}.", state, + cwp.getContainerInfo().getContainerID(), e); + } + } + return addedCount; + } + + // --------------------------------------------------------------------------- + // Pass 3 (extended): QUASI_CLOSED — add missing containers and correct + // containers whose state has lagged behind SCM. + // --------------------------------------------------------------------------- + + /** + * Fetches SCM's full QUASI_CLOSED container ID list (paginated) and for + * each entry: + *

    + *
  • If absent from Recon: calls {@code addNewContainer()}.
  • + *
  • If present in Recon as OPEN: advances via FINALIZE → QUASI_CLOSE.
  • + *
  • If present in Recon as CLOSING: advances via QUASI_CLOSE.
  • + *
  • If already QUASI_CLOSED (or past): no action.
  • + *
+ * + *

Correcting OPEN/CLOSING → QUASI_CLOSED handles the case where Recon + * missed the QUASI_CLOSE transition while it was down or lagging. Without + * this correction the drift check in {@link #decideSyncAction()} could + * detect QUASI_CLOSED count drift but the add-only pass would never fix it + * (the container already exists in Recon, just in the wrong state). + */ + private boolean syncQuasiClosedContainers() { + try { + long totalQuasiClosed = scmServiceProvider.getContainerCount( + HddsProtos.LifeCycleState.QUASI_CLOSED); + if (totalQuasiClosed == 0) { + LOG.debug("Pass 3 (QUASI_CLOSED): no containers found in SCM."); + return true; + } + + ContainerID startContainerId = ContainerID.valueOf(1); + long retrieved = 0; + int addedCount = 0; + int correctedCount = 0; + + while (retrieved < totalQuasiClosed) { + List batch = getContainerIDsByState( + startContainerId, HddsProtos.LifeCycleState.QUASI_CLOSED); + if (batch == null || batch.isEmpty()) { + LOG.warn("Pass 3 (QUASI_CLOSED): SCM reported {} containers, but " + + "returned an empty batch after {} were retrieved.", + totalQuasiClosed, retrieved); + return false; + } + + for (ContainerID containerID : batch) { + if (!containerManager.containerExist(containerID)) { + // Use the batch API with null-pipeline fallback (see Pass 2 comment). + List cwpList = + scmServiceProvider.getExistContainerWithPipelinesInBatch( + Collections.singletonList(containerID.getId())); + if (cwpList.isEmpty()) { + LOG.warn("Pass 3 (QUASI_CLOSED): container {} not returned by SCM; skipping.", + containerID); + } else { + try { + containerManager.addNewContainer(cwpList.get(0)); + addedCount++; + LOG.info("Pass 3 (QUASI_CLOSED): added missing container {}.", containerID); + } catch (IOException e) { + LOG.error("Pass 3 (QUASI_CLOSED): could not add missing container {}.", + containerID, e); } - }); - long lastID = listOfContainers.get(listOfContainers.size() - 1).getId(); - startContainerId = ContainerID.valueOf(lastID + 1); + } } else { - LOG.info("No containers found at SCM in CLOSED state"); - return false; + // Container exists — correct if its state is behind QUASI_CLOSED. + try { + ContainerInfo reconContainer = containerManager.getContainer(containerID); + HddsProtos.LifeCycleState reconState = reconContainer.getState(); + + if (reconState == HddsProtos.LifeCycleState.OPEN) { + // Use transitionOpenToClosing to keep pipelineToOpenContainer accurate. + containerManager.transitionOpenToClosing(containerID, reconContainer); + reconState = HddsProtos.LifeCycleState.CLOSING; + LOG.info("Pass 3 (QUASI_CLOSED): container {} advanced OPEN → CLOSING.", + containerID); + } + if (reconState == HddsProtos.LifeCycleState.CLOSING) { + containerManager.updateContainerState(containerID, QUASI_CLOSE); + correctedCount++; + LOG.info("Pass 3 (QUASI_CLOSED): container {} corrected to QUASI_CLOSED.", + containerID); + } + // Already QUASI_CLOSED (or past): no action needed. + } catch (ContainerNotFoundException e) { + LOG.warn("Pass 3 (QUASI_CLOSED): container {} vanished from Recon between " + + "existence check and state read.", containerID, e); + } catch (InvalidStateTransitionException | IOException e) { + LOG.warn("Pass 3 (QUASI_CLOSED): failed to correct state for container {}.", + containerID, e); + } } - retrievedContainerCount += containerCountPerCall; } + + long lastID = batch.get(batch.size() - 1).getId(); + startContainerId = ContainerID.valueOf(lastID + 1); + retrieved += batch.size(); } + + LOG.info("Pass 3 (QUASI_CLOSED): sync complete, checked {}, added {}, corrected {}.", + retrieved, addedCount, correctedCount); + return true; + } catch (IOException e) { + LOG.error("Pass 3 (QUASI_CLOSED): unexpected error during sync.", e); + return false; + } + } + + // --------------------------------------------------------------------------- + // Pass 4: DELETED retirement — Recon-driven, transition only, never "add" + // --------------------------------------------------------------------------- + + /** + * Retires containers that SCM has marked as DELETED but Recon still holds in + * a non-terminal state (CLOSED or QUASI_CLOSED). + * + *

Why Recon-driven (not SCM-driven): SCM's DELETED list grows + * unboundedly over the lifetime of a cluster. Fetching the full DELETED list + * and diffing against Recon would be O(SCM_DELETED_total) — potentially + * millions of entries. Starting from Recon's bounded set of non-terminal + * containers and querying SCM for each is always cheaper. + * + *

Batching: containers are queried in batches of + * {@code ozone.recon.scm.deleted.container.check.batch.size} (default 500) + * to avoid overwhelming SCM with individual RPCs during a single sync cycle. + * Containers not checked in this cycle are deferred to the next. + * + *

What this does NOT do: this pass never adds new containers to + * Recon. It only drives the lifecycle state forward to DELETED for containers + * that Recon already knows about. + * + * @return {@code true} if the pass completed without fatal error + */ + private boolean retireDeletedContainers() { + try { + // Collect Recon's non-terminal containers (CLOSED and QUASI_CLOSED). + // These are the only states from which SCM can reach DELETED. + List candidates = containerManager.getContainers().stream() + .filter(c -> c.getState() == HddsProtos.LifeCycleState.CLOSED + || c.getState() == HddsProtos.LifeCycleState.QUASI_CLOSED) + .collect(Collectors.toList()); + + if (candidates.isEmpty()) { + LOG.debug("Pass 4 (DELETED retirement): no CLOSED/QUASI_CLOSED containers in Recon."); + return true; + } + + int batchSize = ozoneConfiguration.getInt( + OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE, + OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE_DEFAULT); + + // --- Gap 4 fix: rotating offset ensures every candidate is eventually + // visited even when candidates.size() >> batchSize. --- + int total = candidates.size(); + int start = pass4BatchOffset % total; + int end = Math.min(start + batchSize, total); + List batch = candidates.subList(start, end); + // Advance the cursor; wrap to 0 when we have covered the full list. + pass4BatchOffset = (end >= total) ? 0 : end; + + // --- Gap 6 fix: one batch RPC instead of N individual RPCs. --- + // getExistContainerWithPipelinesInBatch() returns only containers that + // still exist in SCM; containers absent from the result were purged. + List batchIds = batch.stream() + .map(c -> c.containerID().getId()) + .collect(Collectors.toList()); + List existingInSCM = + scmServiceProvider.getExistContainerWithPipelinesInBatch(batchIds); + if (existingInSCM == null) { + LOG.warn("Pass 4 (DELETED retirement): SCM batch lookup returned null " + + "for {} candidate containers. Skipping retirement this cycle.", batchIds.size()); + return true; + } + if (existingInSCM.isEmpty()) { + LOG.warn("Pass 4 (DELETED retirement): SCM batch lookup returned an " + + "empty result for {} candidate containers. Treating this as " + + "ambiguous/unavailable and skipping retirement this cycle.", batchIds.size()); + return true; + } + + // Build a lookup map: containerID (long) → SCM lifecycle state. + Map scmStateMap = new HashMap<>(); + for (ContainerWithPipeline cwp : existingInSCM) { + scmStateMap.put(cwp.getContainerInfo().getContainerID(), + cwp.getContainerInfo().getState()); + } + + int retiredCount = 0; + int checked = 0; + for (ContainerInfo container : batch) { + ContainerID containerID = container.containerID(); + checked++; + HddsProtos.LifeCycleState scmState = scmStateMap.get(containerID.getId()); + + if (scmState == null) { + // Container absent from SCM batch result — it was purged entirely. + LOG.warn("Pass 4 (DELETED retirement): container {} not found in SCM " + + "(may have been purged). Transitioning to DELETED in Recon.", containerID); + retireContainerToDeleted(containerID, container.getState(), + HddsProtos.LifeCycleState.DELETED); + retiredCount++; + } else if (scmState == HddsProtos.LifeCycleState.DELETING + || scmState == HddsProtos.LifeCycleState.DELETED) { + retireContainerToDeleted(containerID, container.getState(), scmState); + retiredCount++; + } + } + + LOG.info("Pass 4 (DELETED retirement): offset={}, checked={}, retired={}, " + + "total_candidates={}.", + start, checked, retiredCount, total); + return true; } catch (Exception e) { - LOG.error("Unable to refresh Recon SCM DB Snapshot. ", e); + LOG.error("Pass 4 (DELETED retirement): unexpected error.", e); return false; } - return true; } + /** + * Drives a container in Recon from its current {@code reconState} forward + * to DELETED, applying only the transitions valid from that state. + * + *

State machine: + *

+   *   CLOSED      → DELETING (DELETE) → DELETED (CLEANUP)
+   *   QUASI_CLOSED → DELETING (DELETE) → DELETED (CLEANUP)
+   * 
+ * + *

All transitions used here are idempotent beyond their target state, + * so repeated invocations are safe. + */ + private void retireContainerToDeleted(ContainerID containerID, + HddsProtos.LifeCycleState reconState, + HddsProtos.LifeCycleState scmState) { + try { + // Both CLOSED and QUASI_CLOSED support DELETE → DELETING + containerManager.updateContainerState(containerID, DELETE); + // DELETING → DELETED only when SCM has fully completed deletion + if (scmState == HddsProtos.LifeCycleState.DELETED) { + containerManager.updateContainerState(containerID, CLEANUP); + LOG.info("Pass 4 (DELETED retirement): container {} transitioned " + + "{} → DELETED in Recon.", containerID, reconState); + } else { + LOG.info("Pass 4 (DELETED retirement): container {} transitioned " + + "{} → DELETING in Recon (SCM is still DELETING).", containerID, reconState); + } + } catch (InvalidStateTransitionException | IOException e) { + LOG.warn("Pass 4 (DELETED retirement): failed to retire container {} " + + "from {} toward DELETED.", containerID, reconState, e); + } + } + + // --------------------------------------------------------------------------- + // Batch size utility + // --------------------------------------------------------------------------- + private long getContainerCountPerCall(long totalContainerCount) { long hadoopRPCSize = ozoneConfiguration.getInt( IPC_MAXIMUM_DATA_LENGTH, IPC_MAXIMUM_DATA_LENGTH_DEFAULT); @@ -110,4 +722,27 @@ private long getContainerCountPerCall(long totalContainerCount) { long batchSize = Math.min(countByRpcLimit, countByBatchLimit); return Math.min(totalContainerCount, batchSize); } + + /** + * Uses the state-filtered container-ID list RPC as the source of truth for + * targeted sync pagination, while the state-aware count RPC is used only to + * avoid unnecessary list calls when SCM has no containers in the state. + */ + private List getContainerIDsByState( + ContainerID startContainerId, + HddsProtos.LifeCycleState state) throws IOException { + long stateTotal = scmServiceProvider.getContainerCount(state); + if (stateTotal == 0) { + return Collections.emptyList(); + } + long batchSize = stateTotal > 0 + ? getContainerCountPerCall(stateTotal) + : getStatePaginationBatchSize(); + return scmServiceProvider.getListOfContainerIDs( + startContainerId, (int) batchSize, state); + } + + private long getStatePaginationBatchSize() { + return getContainerCountPerCall(Long.MAX_VALUE); + } } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTriggerDBSyncEndpoint.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTriggerDBSyncEndpoint.java index da7edc620f32..b0cbccf7d120 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTriggerDBSyncEndpoint.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestTriggerDBSyncEndpoint.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_OM_SNAPSHOT_DB_DIR; import static org.apache.hadoop.ozone.recon.ReconUtils.createTarFile; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyBoolean; import static org.mockito.Mockito.anyString; @@ -98,6 +99,7 @@ public void setUp() throws IOException, AuthenticationException { ReconUtils reconUtilsMock = mock(ReconUtils.class); + when(reconUtilsMock.getReconDbDir(any(), anyString())).thenCallRealMethod(); ReconTaskStatusDao reconTaskStatusDaoMock = mock(ReconTaskStatusDao.class); ReconTaskStatusUpdaterManager taskStatusUpdaterManagerMock = mock(ReconTaskStatusUpdaterManager.class); @@ -151,4 +153,24 @@ public void testTriggerDBSyncEndpointWithOM() { assertEquals(200, response.getStatus()); assertEquals(true, response.getEntity()); } + + /** + * Verifies that {@code POST /api/v1/triggerdbsync/scm} can be invoked and + * returns HTTP 200 with a boolean result. + * + *

In the test environment the Recon SCM facade is wired up against a + * mini in-memory cluster, so the four-pass targeted sync may return + * {@code false} (e.g., empty SCM state). The test only asserts that the + * endpoint is reachable and that the response entity is a boolean, which + * is sufficient to verify wiring and the HTTP contract. + */ + @Test + public void testTriggerSCMContainerSync() { + TriggerDBSyncEndpoint triggerDBSyncEndpoint + = reconTestInjector.getInstance(TriggerDBSyncEndpoint.class); + Response response = triggerDBSyncEndpoint.triggerSCMContainerSync(); + assertEquals(200, response.getStatus()); + assertNotNull(response.getEntity()); + assertEquals(Boolean.class, response.getEntity().getClass()); + } } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/persistence/TestUnhealthyContainersDerbyPerformance.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/persistence/TestUnhealthyContainersDerbyPerformance.java index 5cc90e88409f..239182e3bf83 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/persistence/TestUnhealthyContainersDerbyPerformance.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/persistence/TestUnhealthyContainersDerbyPerformance.java @@ -593,7 +593,45 @@ public void testAtomicReplaceDeleteAndInsertInSingleTransaction() { } // ----------------------------------------------------------------------- - // Test 8 — Batch DELETE performance for 1M records + // Test 8 — Large IN-clause read must be internally chunked + // ----------------------------------------------------------------------- + + /** + * Verifies that loading existing in-state-since values for a large set of + * container IDs does not generate a single oversized Derby statement. + * + *

This regression test covers the read path used by + * {@link org.apache.hadoop.ozone.recon.fsck.ContainerHealthTask} while it + * preserves {@code in_state_since} values across scan cycles. Before + * internal chunking, passing a large ID list here caused Derby to fail with + * {@code ERROR 42ZA0: Statement too complex} and + * {@code constant_pool > 65535} during statement compilation.

+ */ + @Test + @Order(8) + public void testExistingInStateSinceLookupChunksLargeContainerIdList() { + int lookupCount = 20_000; + int expectedRecords = lookupCount * STATE_COUNT; + List containerIds = new ArrayList<>(lookupCount); + + for (long id = 1; id <= lookupCount; id++) { + containerIds.add(id); + } + + long start = System.nanoTime(); + Map existing = + schemaManager.getExistingInStateSinceByContainerIds(containerIds); + long elapsedMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); + + LOG.info("Large in-state-since lookup complete: {} container IDs -> {} rows in {} ms", + lookupCount, existing.size(), elapsedMs); + + assertEquals(expectedRecords, existing.size(), + "Lookup should return one record per existing container/state pair"); + } + + // ----------------------------------------------------------------------- + // Test 9 — Batch DELETE performance for 1M records // ----------------------------------------------------------------------- /** @@ -615,7 +653,7 @@ public void testAtomicReplaceDeleteAndInsertInSingleTransaction() { * all read-only tests.

*/ @Test - @Order(8) + @Order(9) public void testBatchDeletePerformanceOneMillionRecords() { int deleteCount = CONTAINER_ID_RANGE; // 200 000 container IDs int expectedDeleted = deleteCount * STATE_COUNT; // 1 000 000 rows @@ -623,7 +661,7 @@ public void testBatchDeletePerformanceOneMillionRecords() { int internalChunks = (int) Math.ceil( (double) deleteCount / DELETE_CHUNK_SIZE); - LOG.info("--- Test 8: Batch DELETE — {} IDs × {} states = {} rows " + LOG.info("--- Test 9: Batch DELETE — {} IDs × {} states = {} rows " + "({} internal SQL statements of {} IDs) ---", deleteCount, STATE_COUNT, expectedDeleted, internalChunks, DELETE_CHUNK_SIZE); @@ -659,17 +697,17 @@ public void testBatchDeletePerformanceOneMillionRecords() { } // ----------------------------------------------------------------------- - // Test 9 — Re-read counts after full delete + // Test 10 — Re-read counts after full delete // ----------------------------------------------------------------------- /** * After full delete, verifies that each state has 0 records. */ @Test - @Order(9) + @Order(10) public void testCountByStateAfterFullDelete() { int expectedPerState = 0; - LOG.info("--- Test 9: COUNT by state after full delete (expected {} each) ---", + LOG.info("--- Test 10: COUNT by state after full delete (expected {} each) ---", expectedPerState); DSLContext dsl = schemaDefinition.getDSLContext(); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java index 33e20413bfd6..81b52a6e5d21 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/AbstractReconContainerManagerTest.java @@ -155,6 +155,30 @@ private StorageContainerServiceProvider getScmServiceProvider() ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline(containerInfo, pipeline); + ContainerInfo closedContainerInfo = + new ContainerInfo.Builder() + .setContainerID(101L) + .setNumberOfKeys(10) + .setPipelineID(pipeline.getId()) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setOwner("test") + .setState(LifeCycleState.CLOSED) + .build(); + ContainerWithPipeline closedContainerWithPipeline = + new ContainerWithPipeline(closedContainerInfo, pipeline); + + ContainerInfo quasiClosedContainerInfo = + new ContainerInfo.Builder() + .setContainerID(102L) + .setNumberOfKeys(10) + .setPipelineID(pipeline.getId()) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setOwner("test") + .setState(LifeCycleState.QUASI_CLOSED) + .build(); + ContainerWithPipeline quasiClosedContainerWithPipeline = + new ContainerWithPipeline(quasiClosedContainerInfo, pipeline); + List containerList = new LinkedList<>(); List verifiedContainerPipeline = new LinkedList<>(); @@ -182,6 +206,10 @@ private StorageContainerServiceProvider getScmServiceProvider() StorageContainerServiceProvider.class); when(scmServiceProviderMock.getContainerWithPipeline(100L)) .thenReturn(containerWithPipeline); + when(scmServiceProviderMock.getContainerWithPipeline(101L)) + .thenReturn(closedContainerWithPipeline); + when(scmServiceProviderMock.getContainerWithPipeline(102L)) + .thenReturn(quasiClosedContainerWithPipeline); when(scmServiceProviderMock .getExistContainerWithPipelinesInBatch(containerList)) .thenReturn(verifiedContainerPipeline); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconContainerManager.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconContainerManager.java index 1d871b9974b9..2f16c60c37ca 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconContainerManager.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconContainerManager.java @@ -20,6 +20,8 @@ import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.DELETED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.OPEN; import static org.apache.hadoop.ozone.recon.OMMetadataManagerTestUtils.getRandomPipeline; import static org.assertj.core.api.Assertions.assertThat; @@ -183,6 +185,35 @@ public void testUpdateContainerStateFromOpen() throws Exception { getContainerManager().getContainer(containerID).getState()); } + @Test + public void testRecoverDeletedContainerToClosedFromDnReport() throws Exception { + ContainerWithPipeline deletedContainer = getTestContainer(101L, DELETED); + ContainerID containerID = deletedContainer.getContainerInfo().containerID(); + getContainerManager().addNewContainer(deletedContainer); + assertEquals(DELETED, getContainerManager().getContainer(containerID).getState()); + + DatanodeDetails datanodeDetails = randomDatanodeDetails(); + getContainerManager().checkAndAddNewContainer(containerID, State.CLOSED, + datanodeDetails); + + assertEquals(CLOSED, getContainerManager().getContainer(containerID).getState()); + } + + @Test + public void testRecoverDeletedContainerToQuasiClosedFromDnReport() throws Exception { + ContainerWithPipeline deletedContainer = getTestContainer(102L, DELETED); + ContainerID containerID = deletedContainer.getContainerInfo().containerID(); + getContainerManager().addNewContainer(deletedContainer); + assertEquals(DELETED, getContainerManager().getContainer(containerID).getState()); + + DatanodeDetails datanodeDetails = randomDatanodeDetails(); + getContainerManager().checkAndAddNewContainer(containerID, State.QUASI_CLOSED, + datanodeDetails); + + assertEquals(QUASI_CLOSED, + getContainerManager().getContainer(containerID).getState()); + } + ContainerInfo newContainerInfo(long containerId, Pipeline pipeline) { return new ContainerInfo.Builder() .setContainerID(containerId) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconSCMContainerSyncIntegration.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconSCMContainerSyncIntegration.java new file mode 100644 index 000000000000..7bf32e520b12 --- /dev/null +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconSCMContainerSyncIntegration.java @@ -0,0 +1,1300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.scm; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleEvent.FINALIZE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.DELETED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.DELETING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.ONE; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_THRESHOLD; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD_DEFAULT; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.LongStream; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.ozone.recon.ReconServerConfigKeys; +import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerSyncHelper.SyncAction; +import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** + * Integration tests for {@link ReconStorageContainerSyncHelper} and + * {@link ReconStorageContainerManagerFacade#syncWithSCMContainerInfo()}. + * + *

Uses a real {@link ReconContainerManager} backed by RocksDB + * (from {@link AbstractReconContainerManagerTest}) and a mocked + * {@link StorageContainerServiceProvider} that stands in for live SCM RPCs. + * This combination validates actual state machine transitions and database + * persistence without requiring a running cluster. + * + *

Test organisation: + *

    + *
  • {@link DecideSyncActionTests} — all decision paths for + * {@code decideSyncAction()}
  • + *
  • {@link Pass1ClosedSyncTests} — Pass 1: add missing CLOSED containers + * and correct stale OPEN/CLOSING state
  • + *
  • {@link Pass2OpenAddOnlyTests} — Pass 2: add OPEN containers missing + * from Recon
  • + *
  • {@link Pass3QuasiClosedAddOnlyTests} — Pass 3: add QUASI_CLOSED + * containers missing from Recon and correct stale OPEN/CLOSING state
  • + *
  • {@link Pass4DeletedRetirementTests} — Pass 4: retire + * CLOSED/QUASI_CLOSED containers that SCM has already deleted
  • + *
  • {@link LargeScaleTests} — end-to-end scenarios with 100 k+ + * containers covering all state transition paths
  • + *
+ */ +@Timeout(120) +public class TestReconSCMContainerSyncIntegration + extends AbstractReconContainerManagerTest { + + private StorageContainerServiceProvider mockScm; + private ReconStorageContainerSyncHelper syncHelper; + + @BeforeEach + void setupSyncHelper() { + getConf().setInt(OZONE_RECON_SCM_CONTAINER_THRESHOLD, + OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT); + getConf().setInt(OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD, + OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD_DEFAULT); + mockScm = mock(StorageContainerServiceProvider.class); + syncHelper = new ReconStorageContainerSyncHelper( + mockScm, getConf(), getContainerManager()); + } + + // --------------------------------------------------------------------------- + // Helpers + // --------------------------------------------------------------------------- + + /** + * Builds a {@link ContainerWithPipeline} with a null pipeline, which is + * valid for non-OPEN and (after our null-pipeline guard) OPEN containers. + */ + private ContainerWithPipeline containerCwp(long id, LifeCycleState state) { + ContainerInfo info = new ContainerInfo.Builder() + .setContainerID(id) + .setState(state) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setOwner("test") + .build(); + return new ContainerWithPipeline(info, null); + } + + /** + * Seeds the real {@link ReconContainerManager} with {@code count} containers + * in the given {@code state}, using IDs in the range + * [{@code startId}, {@code startId + count}). + * + *

For non-OPEN states the container state manager accepts direct insertion + * from the proto (bypassing the state machine), enabling fast bulk seeding. + * For OPEN containers we use the null-pipeline path of {@code addNewContainer}. + */ + private void seedRecon(long startId, int count, LifeCycleState state) + throws Exception { + ReconContainerManager cm = getContainerManager(); + for (long id = startId; id < startId + count; id++) { + cm.addNewContainer(containerCwp(id, state)); + } + } + + /** + * Seeds Recon with {@code count} OPEN containers and then transitions each + * one to CLOSING so that Pass 1 can exercise the CLOSING→CLOSED correction. + */ + private void seedReconAsClosing(long startId, int count) throws Exception { + seedRecon(startId, count, OPEN); + ReconContainerManager cm = getContainerManager(); + for (long id = startId; id < startId + count; id++) { + cm.updateContainerState(ContainerID.valueOf(id), FINALIZE); + } + } + + /** Returns a list of ContainerIDs for IDs in [{@code start}, {@code end}). */ + private List idRange(long start, long end) { + return LongStream.range(start, end) + .mapToObj(ContainerID::valueOf) + .collect(Collectors.toList()); + } + + // =========================================================================== + // decideSyncAction() tests + // =========================================================================== + + @Nested + class DecideSyncActionTests { + + @Test + void noContainersAnywhereReturnsNoAction() throws Exception { + when(mockScm.getContainerCount()).thenReturn(0L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertEquals(SyncAction.NO_ACTION, syncHelper.decideSyncAction()); + } + + @Test + void countsMatchNoStateDriftReturnsNoAction() throws Exception { + // Seed Recon: 10 CLOSED, 5 OPEN + seedRecon(1, 10, CLOSED); + seedRecon(11, 5, OPEN); + + when(mockScm.getContainerCount()).thenReturn(15L); + when(mockScm.getContainerCount(OPEN)).thenReturn(5L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertEquals(SyncAction.NO_ACTION, syncHelper.decideSyncAction()); + } + + @Test + void smallTotalDriftReturnsTargetedSync() throws Exception { + // Recon has 5, SCM has 8 → drift = 3 + seedRecon(1, 5, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(8L); + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + + @Test + void exactlyAtThresholdReturnsTargetedSync() throws Exception { + // drift == threshold → still TARGETED_SYNC (threshold is exclusive) + int threshold = getConf().getInt( + OZONE_RECON_SCM_CONTAINER_THRESHOLD, + OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT); + + when(mockScm.getContainerCount()).thenReturn((long) threshold); + // Recon is empty → drift == threshold + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + + @Test + void oneAboveThresholdReturnsFullSnapshot() throws Exception { + int threshold = getConf().getInt( + OZONE_RECON_SCM_CONTAINER_THRESHOLD, + OZONE_RECON_SCM_CONTAINER_THRESHOLD_DEFAULT); + + when(mockScm.getContainerCount()).thenReturn((long) threshold + 1L); + // Recon is empty → drift == threshold + 1 + + assertEquals(SyncAction.FULL_SNAPSHOT, syncHelper.decideSyncAction()); + } + + @Test + void largeTotalDriftReturnsFullSnapshot() throws Exception { + // Recon empty, SCM has 200,000 containers → well above default 10k threshold + when(mockScm.getContainerCount()).thenReturn(200_000L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + + assertEquals(SyncAction.FULL_SNAPSHOT, syncHelper.decideSyncAction()); + } + + @Test + void customThresholdIsRespected() throws Exception { + // Override threshold to 50 + getConf().setInt(OZONE_RECON_SCM_CONTAINER_THRESHOLD, 50); + ReconStorageContainerSyncHelper customHelper = new ReconStorageContainerSyncHelper( + mockScm, getConf(), getContainerManager()); + + // Drift = 51 → FULL_SNAPSHOT with custom threshold 50 + when(mockScm.getContainerCount()).thenReturn(51L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + assertEquals(SyncAction.FULL_SNAPSHOT, customHelper.decideSyncAction()); + + // Drift = 50 → TARGETED_SYNC (50 is at threshold, not above) + seedRecon(1, 1, CLOSED); // Recon now has 1, SCM 51 → drift = 50 + assertEquals(SyncAction.TARGETED_SYNC, customHelper.decideSyncAction()); + } + + @Test + void largeOpenOnlyDriftReturnsTargetedSync() throws Exception { + // SCM is ahead only on OPEN containers. This should remain on the + // incremental path rather than forcing a full snapshot. + when(mockScm.getContainerCount()).thenReturn(20_000L); + when(mockScm.getContainerCount(OPEN)).thenReturn(20_000L); + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + + @Test + void largeNonOpenDriftStillReturnsFullSnapshot() throws Exception { + // Most of SCM's drift is in stable states, so a full snapshot is still + // the correct escalation path. + when(mockScm.getContainerCount()).thenReturn(20_000L); + when(mockScm.getContainerCount(OPEN)).thenReturn(5_000L); + + assertEquals(SyncAction.FULL_SNAPSHOT, syncHelper.decideSyncAction()); + } + + @Test + void openDriftExceedsThresholdReturnsTargetedSync() throws Exception { + // Total drift = 0, but OPEN drift = 6 > default threshold (5) + // Recon: 20 OPEN + 30 CLOSED = 50 total + seedRecon(1, 20, OPEN); + seedRecon(21, 30, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(50L); + when(mockScm.getContainerCount(OPEN)).thenReturn(14L); // drift = 6 + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + + @Test + void quasiClosedDriftExceedsThresholdReturnsTargetedSync() throws Exception { + // Total drift = 0, OPEN drift = 0, but QUASI_CLOSED drift = 6 > threshold. + // This is the case that was missed when only OPEN was checked. + // Recon: 10 QUASI_CLOSED + 40 CLOSED = 50 total + seedRecon(1, 10, QUASI_CLOSED); + seedRecon(11, 40, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(50L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(4L); // drift = 6 + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + + @Test + void perStateDriftBelowThresholdReturnsNoAction() throws Exception { + // Both OPEN and QUASI_CLOSED drift <= threshold → NO_ACTION + // Recon: 20 OPEN + 30 CLOSED = 50 total + seedRecon(1, 20, OPEN); + seedRecon(21, 30, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(50L); + when(mockScm.getContainerCount(OPEN)).thenReturn(18L); // drift = 2 <= 5 + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); // drift = 0 + + assertEquals(SyncAction.NO_ACTION, syncHelper.decideSyncAction()); + } + + @Test + void customPerStateDriftThresholdIsRespected() throws Exception { + // Override per-state threshold to 2; OPEN drift of 3 should now trigger + getConf().setInt(OZONE_RECON_SCM_PER_STATE_DRIFT_THRESHOLD, 2); + ReconStorageContainerSyncHelper customHelper = new ReconStorageContainerSyncHelper( + mockScm, getConf(), getContainerManager()); + + seedRecon(1, 10, OPEN); + seedRecon(11, 10, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(20L); // no total drift + when(mockScm.getContainerCount(OPEN)).thenReturn(7L); // drift = 3 > 2 + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertEquals(SyncAction.TARGETED_SYNC, customHelper.decideSyncAction()); + } + + @Test + void bothPerStateDriftsPresentFirstExceedingStateTriggersSync() throws Exception { + // Both OPEN and QUASI_CLOSED are drifted; sync is triggered at first hit + seedRecon(1, 20, OPEN); + seedRecon(21, 20, QUASI_CLOSED); + seedRecon(41, 10, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(50L); // total matches + when(mockScm.getContainerCount(OPEN)).thenReturn(12L); // drift = 8 + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(10L); // drift = 10 + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + } + + // =========================================================================== + // Pass 1: CLOSED sync — add missing containers, correct stale OPEN/CLOSING + // =========================================================================== + + @Nested + class Pass1ClosedSyncTests { + + @BeforeEach + void zeroOtherPasses() throws IOException { + // Keep Pass 2, 3, 4 quiet so only Pass 1 exercises state changes + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + } + + @Test + void addsClosedContainerMissingFromRecon() throws Exception { + ContainerID cid = ContainerID.valueOf(1L); + ContainerWithPipeline cwp = containerCwp(1L, CLOSED); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) + .thenReturn(Collections.singletonList(cid)); + when(mockScm.getExistContainerWithPipelinesInBatch(Collections.singletonList(1L))) + .thenReturn(Collections.singletonList(cwp)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void correctsOpenContainerToClosedInRecon() throws Exception { + // Recon: container 1 is OPEN. SCM: container 1 is CLOSED. + seedRecon(1, 1, OPEN); + ContainerID cid = ContainerID.valueOf(1L); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void correctsClosingContainerToClosedInRecon() throws Exception { + // Recon: container 1 is CLOSING. SCM: container 1 is CLOSED. + seedReconAsClosing(1, 1); + ContainerID cid = ContainerID.valueOf(1L); + assertEquals(CLOSING, getContainerManager().getContainer(cid).getState()); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void skipsContainerAlreadyClosed() throws Exception { + // Recon: container 1 is already CLOSED. Pass 1 should be a no-op. + seedRecon(1, 1, CLOSED); + ContainerID cid = ContainerID.valueOf(1L); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // State must remain CLOSED, not re-transitioned + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void pass1CorrectQuasiClosedToClosedViaForceClose() throws Exception { + // Pass 1 corrects QUASI_CLOSED → CLOSED using FORCE_CLOSE when SCM shows the + // container is definitively CLOSED. This handles the case where Recon missed + // the final quorum decision made by SCM. + seedRecon(1, 1, QUASI_CLOSED); + ContainerID cid = ContainerID.valueOf(1L); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // Container is now CLOSED in Recon (corrected by Pass 1 via FORCE_CLOSE) + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void emptyListFromSCMBeforeTotalExhaustedReturnsFalse() throws Exception { + // SCM says there are 2 containers but returns empty list — indicates a + // transient SCM error; sync should return false (partial). + when(mockScm.getContainerCount(CLOSED)).thenReturn(2L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(2), eq(CLOSED))) + .thenReturn(Collections.emptyList()); + + boolean result = syncHelper.syncWithSCMContainerInfo(); + // Pass 1 failed (empty list before total exhausted), but passes 2-4 still run. + // Overall result is false because at least one pass failed. + assertTrue(!result || getContainerManager().getContainers().isEmpty()); + } + + @Test + void multiplePagesAllBatchesProcessed() throws Exception { + // Force batch size to 3 so 7 containers span 3 pages + getConf().setLong( + ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_ID_BATCH_SIZE, 3L); + ReconStorageContainerSyncHelper pagedHelper = new ReconStorageContainerSyncHelper( + mockScm, getConf(), getContainerManager()); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(7L); + // Page 1: IDs 1-3 + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(3), eq(CLOSED))) + .thenReturn(idRange(1, 4)); + // Page 2: IDs 4-6 + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(4L)), eq(3), eq(CLOSED))) + .thenReturn(idRange(4, 7)); + // Page 3: ID 7 + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(7L)), eq(3), eq(CLOSED))) + .thenReturn(idRange(7, 8)); + + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())).thenAnswer(inv -> { + List idList = inv.getArgument(0); + return idList.stream().map(id -> containerCwp(id, CLOSED)).collect(Collectors.toList()); + }); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertTrue(pagedHelper.syncWithSCMContainerInfo()); + assertEquals(7, getContainerManager().getContainers(CLOSED).size()); + } + + @Test + void mixedExistingAndMissingOnlyMissingAreAdded() throws Exception { + // Recon already has containers 1,3,5; SCM reports 1-5 CLOSED + seedRecon(1, 1, CLOSED); + seedRecon(3, 1, CLOSED); + seedRecon(5, 1, CLOSED); + + List scmClosed = idRange(1, 6); // 1,2,3,4,5 + when(mockScm.getContainerCount(CLOSED)).thenReturn(5L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(5), eq(CLOSED))) + .thenReturn(scmClosed); + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())).thenAnswer(inv -> { + List idList = inv.getArgument(0); + return idList.stream().map(id -> containerCwp(id, CLOSED)).collect(Collectors.toList()); + }); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // All 5 should now be in Recon (3 pre-existing + 2 added) + assertEquals(5, getContainerManager().getContainers(CLOSED).size()); + } + } + + // =========================================================================== + // Pass 2: OPEN add-only + // =========================================================================== + + @Nested + class Pass2OpenAddOnlyTests { + + @BeforeEach + void zeroOtherPasses() throws IOException { + when(mockScm.getContainerCount(CLOSED)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + } + + @Test + void addsOpenContainerMissingFromRecon() throws Exception { + ContainerID cid = ContainerID.valueOf(10L); + ContainerWithPipeline cwp = containerCwp(10L, OPEN); + + when(mockScm.getContainerCount(OPEN)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(OPEN))) + .thenReturn(Collections.singletonList(cid)); + when(mockScm.getExistContainerWithPipelinesInBatch(Collections.singletonList(10L))) + .thenReturn(Collections.singletonList(cwp)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(OPEN, getContainerManager().getContainer(cid).getState()); + } + + @Test + void doesNotDuplicateExistingOpenContainer() throws Exception { + seedRecon(10, 1, OPEN); + ContainerID cid = ContainerID.valueOf(10L); + + when(mockScm.getContainerCount(OPEN)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(OPEN))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(1, getContainerManager().getContainers(OPEN).size()); + } + + @Test + void doesNotOverwriteContainerAlreadyAdvancedBeyondOpen() throws Exception { + // Container 10 is already CLOSED in Recon but still appears in SCM's OPEN + // list (stale SCM data). Pass 2 must NOT revert it to OPEN. + seedRecon(10, 1, CLOSED); + ContainerID cid = ContainerID.valueOf(10L); + + when(mockScm.getContainerCount(OPEN)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(OPEN))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // State should remain CLOSED — Pass 2 is add-only and skips present containers + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + assertEquals(0, getContainerManager().getContainers(OPEN).size()); + } + + @Test + void openContainersWithNullPipelineAddedSuccessfully() throws Exception { + // Verifies null-pipeline guard: OPEN container returned with null pipeline + // (e.g., pipeline already cleaned up on SCM) must still be added. + ContainerID cid = ContainerID.valueOf(20L); + when(mockScm.getContainerCount(OPEN)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(OPEN))) + .thenReturn(Collections.singletonList(cid)); + // null pipeline — simulates cleaned-up pipeline; batch API returns it with null pipeline + when(mockScm.getExistContainerWithPipelinesInBatch(Collections.singletonList(20L))) + .thenReturn(Collections.singletonList(containerCwp(20L, OPEN))); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(OPEN, getContainerManager().getContainer(cid).getState()); + } + + @Test + void openSyncUsesCursorAndOnlyFetchesNewOpenContainers() throws Exception { + getConf().setLong( + ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_ID_BATCH_SIZE, 2L); + ReconStorageContainerSyncHelper pagedHelper = new ReconStorageContainerSyncHelper( + mockScm, getConf(), getContainerManager()); + + when(mockScm.getContainerCount(OPEN)).thenReturn(2L, 1L, 0L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(2), eq(OPEN))) + .thenReturn(idRange(10, 12)); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(12L)), eq(2), eq(OPEN))) + .thenReturn(Collections.emptyList()); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(12L)), eq(1), eq(OPEN))) + .thenReturn(Collections.singletonList(ContainerID.valueOf(20L))); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(21L)), eq(2), eq(OPEN))) + .thenReturn(Collections.emptyList()); + when(mockScm.getExistContainerWithPipelinesInBatch(Arrays.asList(10L, 11L))) + .thenReturn(Arrays.asList(containerCwp(10L, OPEN), containerCwp(11L, OPEN))); + when(mockScm.getExistContainerWithPipelinesInBatch(Collections.singletonList(20L))) + .thenReturn(Collections.singletonList(containerCwp(20L, OPEN))); + + assertTrue(pagedHelper.syncWithSCMContainerInfo()); + assertEquals(2, getContainerManager().getContainers(OPEN).size()); + + assertTrue(pagedHelper.syncWithSCMContainerInfo()); + assertEquals(3, getContainerManager().getContainers(OPEN).size()); + + verify(mockScm, times(1)).getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(2), eq(OPEN)); + } + } + + // =========================================================================== + // Pass 3: QUASI_CLOSED add + correct + // =========================================================================== + + @Nested + class Pass3QuasiClosedAddOnlyTests { + + @BeforeEach + void zeroOtherPasses() throws IOException { + when(mockScm.getContainerCount(CLOSED)).thenReturn(0L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + } + + @Test + void addsQuasiClosedContainerMissingFromRecon() throws Exception { + ContainerID cid = ContainerID.valueOf(30L); + ContainerWithPipeline cwp = containerCwp(30L, QUASI_CLOSED); + + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(QUASI_CLOSED))) + .thenReturn(Collections.singletonList(cid)); + when(mockScm.getExistContainerWithPipelinesInBatch(Collections.singletonList(30L))) + .thenReturn(Collections.singletonList(cwp)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(QUASI_CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void quasiClosedWithNullPipelineAddedSuccessfully() throws Exception { + // QUASI_CLOSED containers whose pipelines have been cleaned up on SCM + // must still be added with null pipeline (no NullPointerException). + ContainerID cid = ContainerID.valueOf(31L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(QUASI_CLOSED))) + .thenReturn(Collections.singletonList(cid)); + when(mockScm.getExistContainerWithPipelinesInBatch(Collections.singletonList(31L))) + .thenReturn(Collections.singletonList(containerCwp(31L, QUASI_CLOSED))); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(QUASI_CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void doesNotDuplicateExistingQuasiClosedContainer() throws Exception { + seedRecon(30, 1, QUASI_CLOSED); + ContainerID cid = ContainerID.valueOf(30L); + + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(QUASI_CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(1, getContainerManager().getContainers(QUASI_CLOSED).size()); + } + + @Test + void doesNotOverwriteContainerAlreadyClosedInRecon() throws Exception { + // Container already CLOSED in Recon but still in SCM's QUASI_CLOSED list. + // Pass 3 must not revert the container to QUASI_CLOSED (no downgrade). + seedRecon(30, 1, CLOSED); + ContainerID cid = ContainerID.valueOf(30L); + + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(QUASI_CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void pass3CorrectOpenToQuasiClosed() throws Exception { + // Container is OPEN in Recon but SCM has already moved it to QUASI_CLOSED. + // Pass 3 must advance it: OPEN → CLOSING (FINALIZE) → QUASI_CLOSED (QUASI_CLOSE). + seedRecon(35, 1, OPEN); + ContainerID cid = ContainerID.valueOf(35L); + + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(QUASI_CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(QUASI_CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void pass3CorrectClosingToQuasiClosed() throws Exception { + // Container is stuck CLOSING in Recon but SCM already moved it to QUASI_CLOSED. + // Pass 3 must advance it: CLOSING → QUASI_CLOSED (QUASI_CLOSE). + seedRecon(36, 1, CLOSING); + ContainerID cid = ContainerID.valueOf(36L); + + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(1L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(1), eq(QUASI_CLOSED))) + .thenReturn(Collections.singletonList(cid)); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(QUASI_CLOSED, getContainerManager().getContainer(cid).getState()); + } + } + + // =========================================================================== + // Pass 4: DELETED retirement (uses getExistContainerWithPipelinesInBatch) + // =========================================================================== + + @Nested + class Pass4DeletedRetirementTests { + + @BeforeEach + void zeroAdditivePasses() throws IOException { + when(mockScm.getContainerCount(CLOSED)).thenReturn(0L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + } + + @Test + void retiresClosedContainerWhenSCMReportsDeleted() throws Exception { + seedRecon(100, 1, CLOSED); + ContainerID cid = ContainerID.valueOf(100L); + + // Batch RPC returns the container as DELETED + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenReturn(Collections.singletonList(containerCwp(100L, DELETED))); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(DELETED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void retiresClosedContainerToDeletingWhenSCMReportsDeleting() throws Exception { + seedRecon(101, 1, CLOSED); + ContainerID cid = ContainerID.valueOf(101L); + + // Batch RPC returns the container as DELETING + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenReturn(Collections.singletonList(containerCwp(101L, DELETING))); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // Only DELETING transition applied (not CLEANUP), so state is DELETING in Recon + assertEquals(DELETING, getContainerManager().getContainer(cid).getState()); + } + + @Test + void retiresQuasiClosedContainerWhenSCMReportsDeleted() throws Exception { + seedRecon(102, 1, QUASI_CLOSED); + ContainerID cid = ContainerID.valueOf(102L); + + // Batch RPC returns the container as DELETED + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenReturn(Collections.singletonList(containerCwp(102L, DELETED))); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(DELETED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void emptyBatchResultSkipsRetirementAsAmbiguous() throws Exception { + // A completely empty batch result is ambiguous: it could mean the + // queried containers were purged, but it could also mean the batch RPC + // failed or returned no data. Recon should skip retirement in that + // case rather than deleting live containers. + seedRecon(103, 1, CLOSED); + ContainerID cid = ContainerID.valueOf(103L); + + // Batch returns empty list → skip retirement for safety + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenReturn(Collections.emptyList()); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(CLOSED, getContainerManager().getContainer(cid).getState()); + } + + @Test + void openContainersAreNotCandidatesForRetirement() throws Exception { + // Pass 4 only checks CLOSED and QUASI_CLOSED; OPEN containers are skipped. + // No batch RPC mock needed: Pass 4 sees no candidates and returns early. + seedRecon(200, 5, OPEN); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // All OPEN containers remain OPEN; no retirements occurred + assertEquals(5, getContainerManager().getContainers(OPEN).size()); + assertEquals(0, getContainerManager().getContainers(DELETED).size()); + } + + @Test + void liveContainersAreNotRetired() throws Exception { + // CLOSED in Recon, also CLOSED in SCM (not deleted) → must stay CLOSED. + seedRecon(300, 3, CLOSED); + + // Batch RPC returns all three containers as CLOSED (still live in SCM) + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenAnswer(inv -> { + List ids = inv.getArgument(0); + return ids.stream() + .map(id -> containerCwp(id, CLOSED)) + .collect(Collectors.toList()); + }); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(3, getContainerManager().getContainers(CLOSED).size()); + assertEquals(0, getContainerManager().getContainers(DELETED).size()); + } + + @Test + void batchSizeLimitsCheckPerCycle() throws Exception { + // Seed 10 CLOSED containers; set batch size = 3. + // Only a rotating window of 3 should be evaluated per sync cycle. + seedRecon(400, 10, CLOSED); + getConf().setInt(OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE, 3); + ReconStorageContainerSyncHelper batchHelper = new ReconStorageContainerSyncHelper( + mockScm, getConf(), getContainerManager()); + + // All containers in the batch window are DELETED in SCM + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenAnswer(inv -> { + List ids = inv.getArgument(0); + return ids.stream() + .map(id -> containerCwp(id, DELETED)) + .collect(Collectors.toList()); + }); + + assertTrue(batchHelper.syncWithSCMContainerInfo()); + // Exactly 3 containers should be retired per cycle (rotating batch window) + long retiredCount = getContainerManager().getContainers().stream() + .filter(c -> c.getState() == DELETED).count(); + assertTrue(retiredCount <= 3, + "Expected at most 3 retirements per cycle, got " + retiredCount); + } + + @Test + void batchRPCPartialResultRetiresPresentAndAbsent() throws Exception { + // 500: not in batch result (absent from SCM → purged) → should be retired to DELETED + // 501: in batch result with CLOSED state (still live in SCM) → should stay CLOSED + // 502: in batch result with DELETED state → should be retired to DELETED + seedRecon(500, 3, CLOSED); + + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenReturn(Arrays.asList( + containerCwp(501L, CLOSED), + containerCwp(502L, DELETED))); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + // 500: absent from batch → treated as purged → DELETED + assertEquals(DELETED, getContainerManager().getContainer( + ContainerID.valueOf(500L)).getState()); + // 501: SCM still has it as CLOSED → no retirement + assertEquals(CLOSED, getContainerManager().getContainer( + ContainerID.valueOf(501L)).getState()); + // 502: SCM says DELETED → DELETED + assertEquals(DELETED, getContainerManager().getContainer( + ContainerID.valueOf(502L)).getState()); + } + } + + // =========================================================================== + // Large-scale tests (100 k+ containers) + // =========================================================================== + + @Nested + class LargeScaleTests { + + private static final int LARGE_COUNT = 100_000; + + @BeforeEach + void configLargeBatchSize() { + // Allow single-batch fetches for all large-scale tests + getConf().setLong( + ReconServerConfigKeys.OZONE_RECON_SCM_CONTAINER_ID_BATCH_SIZE, + (long) LARGE_COUNT); + getConf().setInt(OZONE_RECON_SCM_DELETED_CONTAINER_CHECK_BATCH_SIZE, + LARGE_COUNT); + // Default Pass 4 mock: all queried containers are still CLOSED (not deleted). + // Individual tests that need retirement override this mock inline. + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenAnswer(inv -> { + List ids = inv.getArgument(0); + return ids.stream() + .map(id -> containerCwp(id, CLOSED)) + .collect(Collectors.toList()); + }); + } + + @Test + void pass1100kClosedContainersMissingFromRecon() throws Exception { + // Recon: empty. SCM: 100k CLOSED containers. + // After sync: Recon should have all 100k as CLOSED. + List ids = idRange(1, LARGE_COUNT + 1); + + when(mockScm.getContainerCount(CLOSED)).thenReturn((long) LARGE_COUNT); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(LARGE_COUNT), eq(CLOSED))) + .thenReturn(ids); + // Pass 1 add-missing path now uses getExistContainerWithPipelinesInBatch. + // The @BeforeEach default mock already returns CLOSED for any asked IDs. + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(CLOSED).size()); + assertEquals(0, getContainerManager().getContainers(OPEN).size()); + } + + @Test + void pass1100kOpenContainersStuckInReconAllCorrectedToClosed() throws Exception { + // Recon: 100k OPEN containers. SCM: all 100k are CLOSED. + // After sync: all 100k should be CLOSED in Recon. + seedRecon(1, LARGE_COUNT, OPEN); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(OPEN).size()); + + List ids = idRange(1, LARGE_COUNT + 1); + when(mockScm.getContainerCount(CLOSED)).thenReturn((long) LARGE_COUNT); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(LARGE_COUNT), eq(CLOSED))) + .thenReturn(ids); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(CLOSED).size()); + assertEquals(0, getContainerManager().getContainers(OPEN).size()); + } + + @Test + void pass1100kClosingContainersStuckInReconAllCorrectedToClosed() throws Exception { + // Recon: 100k CLOSING containers. SCM: all 100k are CLOSED. + seedReconAsClosing(1, LARGE_COUNT); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(CLOSING).size()); + + List ids = idRange(1, LARGE_COUNT + 1); + when(mockScm.getContainerCount(CLOSED)).thenReturn((long) LARGE_COUNT); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(LARGE_COUNT), eq(CLOSED))) + .thenReturn(ids); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(CLOSED).size()); + assertEquals(0, getContainerManager().getContainers(CLOSING).size()); + } + + @Test + void pass4100kClosedContainersAllDeletedInSCM() throws Exception { + // Recon: 100k CLOSED. SCM: all 100k are DELETED. + // After sync: all 100k should be DELETED in Recon. + seedRecon(1, LARGE_COUNT, CLOSED); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(0L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + // Override default mock: all queried containers are DELETED in SCM + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenAnswer(inv -> { + List ids = inv.getArgument(0); + return ids.stream() + .map(id -> containerCwp(id, DELETED)) + .collect(Collectors.toList()); + }); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(DELETED).size()); + assertEquals(0, getContainerManager().getContainers(CLOSED).size()); + } + + @Test + void pass4100kQuasiClosedContainersAllDeletedInSCM() throws Exception { + // Recon: 100k QUASI_CLOSED. SCM: all 100k are DELETED. + seedRecon(1, LARGE_COUNT, QUASI_CLOSED); + + when(mockScm.getContainerCount(CLOSED)).thenReturn(0L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + // Override default mock: all queried containers are DELETED in SCM + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())) + .thenAnswer(inv -> { + List ids = inv.getArgument(0); + return ids.stream() + .map(id -> containerCwp(id, DELETED)) + .collect(Collectors.toList()); + }); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + assertEquals(LARGE_COUNT, getContainerManager().getContainers(DELETED).size()); + assertEquals(0, getContainerManager().getContainers(QUASI_CLOSED).size()); + } + + /** + * Full 100 k mixed scenario covering all four sync passes simultaneously. + * + *

+     * Container ID ranges and their scenario:
+     *   1      – 20,000 : OPEN in Recon, CLOSED in SCM
+     *                      → Pass 1 corrects to CLOSED
+     *   20,001 – 50,000 : absent from Recon, CLOSED in SCM
+     *                      → Pass 1 adds as CLOSED
+     *   50,001 – 70,000 : absent from Recon, OPEN in SCM
+     *                      → Pass 2 adds as OPEN
+     *   70,001 – 80,000 : absent from Recon, QUASI_CLOSED in SCM
+     *                      → Pass 3 adds as QUASI_CLOSED
+     *   80,001 – 100,000: CLOSED in Recon, DELETED in SCM
+     *                      → Pass 4 retires to DELETED
+     * 
+ * + *

After a single {@code syncWithSCMContainerInfo()} call: + *

    + *
  • 50,000 CLOSED (20k corrected + 30k added)
  • + *
  • 20,000 OPEN (newly added)
  • + *
  • 10,000 QUASI_CLOSED (newly added)
  • + *
  • 19,999 DELETED (retired — Pass 4 uses batch of 100k + * covering all CLOSED/QUASI_CLOSED candidates at time of run)
  • + *
+ */ + @Test + void fullSync100kMixedStateTransitionScenario() throws Exception { + // ---- Pre-seed Recon ---- + // Range 1-20k: stuck OPEN (SCM has them as CLOSED) + seedRecon(1, 20_000, OPEN); + // Range 80001-100000: CLOSED in Recon (will be deleted) + seedRecon(80_001, 19_999, CLOSED); + + // ---- Mock SCM ---- + // Pass 1 — CLOSED list: IDs 1-50000 + List closedIds = idRange(1, 50_001); + when(mockScm.getContainerCount(CLOSED)).thenReturn(50_000L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(50_000), eq(CLOSED))) + .thenReturn(closedIds); + + // Pass 2 — OPEN list: IDs 50001-70000 + List openIds = idRange(50_001, 70_001); + when(mockScm.getContainerCount(OPEN)).thenReturn(20_000L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(20_000), eq(OPEN))) + .thenReturn(openIds); + + // Pass 3 — QUASI_CLOSED list: IDs 70001-80000 + List qcIds = idRange(70_001, 80_001); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(10_000L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(10_000), eq(QUASI_CLOSED))) + .thenReturn(qcIds); + + // Unified batch mock: handles both Pass 1/2/3 add paths and Pass 4 retirement. + // Pass 1 adds [20001-50000] as CLOSED, Pass 2 adds [50001-70000] as OPEN, + // Pass 3 adds [70001-80000] as QUASI_CLOSED; Pass 4 retires [80001-100000] as DELETED. + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())).thenAnswer(inv -> { + List ids = inv.getArgument(0); + return ids.stream().map(id -> { + LifeCycleState state; + if (id > 80_000) { + state = DELETED; // Pass 4: retire these containers + } else if (id > 70_000) { + state = QUASI_CLOSED; // Pass 3 add + Pass 4: alive as QUASI_CLOSED + } else if (id > 50_000) { + state = OPEN; // Pass 2 add (Pass 4 doesn't query OPEN containers) + } else { + state = CLOSED; // Pass 1 correct+add + Pass 4: alive as CLOSED + } + return containerCwp(id, state); + }).collect(Collectors.toList()); + }); + + // ---- Run sync ---- + assertTrue(syncHelper.syncWithSCMContainerInfo()); + + // ---- Verify final state ---- + List allContainers = getContainerManager().getContainers(); + long closedCount = allContainers.stream().filter(c -> c.getState() == CLOSED).count(); + long openCount = allContainers.stream().filter(c -> c.getState() == OPEN).count(); + long qcCount = allContainers.stream().filter(c -> c.getState() == QUASI_CLOSED).count(); + long deletedCount = allContainers.stream().filter(c -> c.getState() == DELETED).count(); + + // 20k corrected from OPEN + 30k added = 50k CLOSED + assertEquals(50_000, closedCount, + "Expected 50,000 CLOSED containers"); + // 20k newly added from SCM's OPEN list + assertEquals(20_000, openCount, + "Expected 20,000 OPEN containers"); + // 10k newly added from SCM's QUASI_CLOSED list + assertEquals(10_000, qcCount, + "Expected 10,000 QUASI_CLOSED containers"); + // 19,999 retired from Recon's CLOSED set to DELETED + assertEquals(19_999, deletedCount, + "Expected 19,999 DELETED containers"); + + // Total: 50k+20k+10k+19999 = 99,999 + assertEquals(99_999, allContainers.size()); + } + + @Test + void syncIsIdempotentRunningTwiceProducesSameResult() throws Exception { + // Seed: 5k OPEN (stuck), 5k CLOSED (missing) + seedRecon(1, 5_000, OPEN); + + List closedIds = idRange(1, 10_001); + when(mockScm.getContainerCount(CLOSED)).thenReturn(10_000L); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(10_000), eq(CLOSED))) + .thenReturn(closedIds); + // Default @BeforeEach mock for getExistContainerWithPipelinesInBatch already returns + // CLOSED for any IDs — covers both the Pass 1 add path and Pass 4 retirement check. + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + // First sync + assertTrue(syncHelper.syncWithSCMContainerInfo()); + long closedAfterFirst = getContainerManager().getContainers(CLOSED).size(); + + // Second sync — SCM still reports same data; result must be identical + assertTrue(syncHelper.syncWithSCMContainerInfo()); + long closedAfterSecond = getContainerManager().getContainers(CLOSED).size(); + + assertEquals(closedAfterFirst, closedAfterSecond, + "Second sync must not change the container count"); + assertEquals(10_000, closedAfterSecond); + } + + @Test + void decideSyncAction100kDriftTriggerFullSnapshot() throws Exception { + // SCM has 100k containers, Recon is empty → drift 100k > threshold 10k + when(mockScm.getContainerCount()).thenReturn(100_000L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + + assertEquals(SyncAction.FULL_SNAPSHOT, syncHelper.decideSyncAction()); + } + + @Test + void decideSyncAction50kReconMissingTriggersFullSnapshot() throws Exception { + // Recon has 50k CLOSED, SCM has 100k → drift 50k > threshold 10k + seedRecon(1, 50_000, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(100_000L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + + assertEquals(SyncAction.FULL_SNAPSHOT, syncHelper.decideSyncAction()); + } + + @Test + void decideSyncAction5kDriftTriggersTargetedSync() throws Exception { + // Recon has 95k, SCM has 100k → drift 5k < threshold 10k → TARGETED_SYNC + seedRecon(1, 95_000, CLOSED); + + when(mockScm.getContainerCount()).thenReturn(100_000L); + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); + + assertEquals(SyncAction.TARGETED_SYNC, syncHelper.decideSyncAction()); + } + + @Test + void decideSyncAction100kOpenToClosedDriftTriggersFullSnapshot() throws Exception { + // Total counts match, but SCM has advanced every OPEN container to a + // stable non-OPEN state. That creates a large non-OPEN drift and should + // escalate to FULL_SNAPSHOT under the new policy. + seedRecon(1, 100_000, OPEN); // all OPEN in Recon + + when(mockScm.getContainerCount()).thenReturn(100_000L); // total matches + when(mockScm.getContainerCount(OPEN)).thenReturn(0L); // SCM has 0 OPEN + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn(0L); + + assertEquals(SyncAction.FULL_SNAPSHOT, syncHelper.decideSyncAction()); + } + + @Test + void allStateTransitionPathsEndToEnd() throws Exception { + // Exhaustive state-transition coverage in a single test: + // OPEN → CLOSED (Pass 1 correction) + // CLOSING → CLOSED (Pass 1 correction) + // absent → CLOSED (Pass 1 add) + // absent → OPEN (Pass 2 add) + // absent → QUASI_CLOSED (Pass 3 add) + // CLOSED → DELETING (Pass 4: SCM DELETING) + // CLOSED → DELETED (Pass 4: SCM DELETED) + // QUASI_CLOSED → DELETED (Pass 4: SCM DELETED) + // CLOSED → DELETED (Pass 4: ContainerNotFoundException) + + int perGroup = 10_000; // 10k containers per scenario = 90k total + + // Pre-seed Recon + long base = 1L; + seedRecon(base, perGroup, OPEN); // group A: stuck OPEN + seedReconAsClosing(base + perGroup, perGroup); // group B: stuck CLOSING + // group C (base+2*perGroup): absent, SCM has them CLOSED + // group D (base+3*perGroup): absent, SCM has them OPEN + // group E (base+4*perGroup): absent, SCM has them QUASI_CLOSED + seedRecon(base + 5L * perGroup, perGroup, CLOSED); // group F: to retire → DELETING + seedRecon(base + 6L * perGroup, perGroup, CLOSED); // group G: to retire → DELETED + seedRecon(base + 7L * perGroup, perGroup, QUASI_CLOSED); // group H: to retire → DELETED + seedRecon(base + 8L * perGroup, perGroup, CLOSED); // group I: SCM ContainerNotFound + + // Ranges + long bEnd = base + 2L * perGroup; + long cEnd = base + 3L * perGroup; + long dEnd = base + 4L * perGroup; + long eEnd = base + 5L * perGroup; + long fEnd = base + 6L * perGroup; + long hEnd = base + 8L * perGroup; + + // Build CLOSED list for Pass 1: groups A + B + C + List closedIds = idRange(base, cEnd); + when(mockScm.getContainerCount(CLOSED)).thenReturn((long) closedIds.size()); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(closedIds.size()), eq(CLOSED))) + .thenReturn(closedIds); + + // Build OPEN list for Pass 2: group D + List openIds = idRange(bEnd, dEnd); + when(mockScm.getContainerCount(OPEN)).thenReturn((long) openIds.size()); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(openIds.size()), eq(OPEN))) + .thenReturn(openIds); + + // Build QUASI_CLOSED list for Pass 3: group E + List qcIds = idRange(dEnd, eEnd); + when(mockScm.getContainerCount(QUASI_CLOSED)).thenReturn((long) qcIds.size()); + when(mockScm.getListOfContainerIDs( + eq(ContainerID.valueOf(1L)), eq(qcIds.size()), eq(QUASI_CLOSED))) + .thenReturn(qcIds); + + // Unified batch mock: handles both Pass 1/2/3 add paths and Pass 4 retirement. + // Pass 1 adds group C (absent→CLOSED); Pass 2 adds group D (absent→OPEN); + // Pass 3 adds group E (absent→QUASI_CLOSED); Pass 4 retires groups F/G/H/I. + when(mockScm.getExistContainerWithPipelinesInBatch(anyList())).thenAnswer(inv -> { + List ids = inv.getArgument(0); + List result = new ArrayList<>(); + for (Long id : ids) { + if (id >= base && id < cEnd) { + result.add(containerCwp(id, CLOSED)); // Groups A,B,C: CLOSED in SCM + } else if (id >= cEnd && id < dEnd) { + result.add(containerCwp(id, OPEN)); // Group D: OPEN in SCM (Pass 2 add) + } else if (id >= dEnd && id < eEnd) { + result.add(containerCwp(id, QUASI_CLOSED)); // Group E: QUASI_CLOSED (Pass 3 + alive) + } else if (id >= eEnd && id < fEnd) { + result.add(containerCwp(id, DELETING)); // Group F: DELETING in SCM + } else if (id >= fEnd && id < hEnd) { + result.add(containerCwp(id, DELETED)); // Groups G+H: DELETED in SCM + } + // Group I (>= hEnd): excluded from result → scmState=null → retired to DELETED + } + return result; + }); + + assertTrue(syncHelper.syncWithSCMContainerInfo()); + + List all = getContainerManager().getContainers(); + + long closedCount = all.stream().filter(c -> c.getState() == CLOSED).count(); + long openCount = all.stream().filter(c -> c.getState() == OPEN).count(); + long qcCount = all.stream().filter(c -> c.getState() == QUASI_CLOSED).count(); + long deletingCount = all.stream().filter(c -> c.getState() == DELETING).count(); + long deletedCount = all.stream().filter(c -> c.getState() == DELETED).count(); + + // Groups A+B corrected + Group C added = 3 * perGroup CLOSED + assertEquals(3L * perGroup, closedCount, + "Groups A (OPEN→CLOSED), B (CLOSING→CLOSED), C (added) = 3 * perGroup CLOSED"); + // Group D added as OPEN + assertEquals((long) perGroup, openCount, + "Group D: added as OPEN"); + // Group E added as QUASI_CLOSED + assertEquals((long) perGroup, qcCount, + "Group E: added as QUASI_CLOSED"); + // Group F: CLOSED → DELETING + assertEquals((long) perGroup, deletingCount, + "Group F: CLOSED → DELETING"); + // Groups G + H + I: CLOSED/QUASI_CLOSED → DELETED + assertEquals(3L * perGroup, deletedCount, + "Groups G, H, I: → DELETED"); + } + } +} diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconStorageContainerSyncHelper.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconStorageContainerSyncHelper.java index 9ba0d85a931b..24dd50d76ac2 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconStorageContainerSyncHelper.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconStorageContainerSyncHelper.java @@ -23,13 +23,11 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; import java.util.Arrays; @@ -76,12 +74,16 @@ void testContainerMissingFromReconIsAdded() throws Exception { eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) .thenReturn(Collections.singletonList(cid)); when(mockContainerManager.containerExist(cid)).thenReturn(false); - when(mockScmServiceProvider.getContainerWithPipeline(42L)).thenReturn(cwp); + // Pass 1 now uses getExistContainerWithPipelinesInBatch for missing containers so that + // the null-pipeline fallback prevents silent skipping when pipeline lookups fail. + when(mockScmServiceProvider.getExistContainerWithPipelinesInBatch( + Collections.singletonList(42L))).thenReturn(Collections.singletonList(cwp)); boolean result = syncHelper.syncWithSCMContainerInfo(); assertTrue(result); - verify(mockScmServiceProvider).getContainerWithPipeline(42L); + verify(mockScmServiceProvider).getExistContainerWithPipelinesInBatch( + Collections.singletonList(42L)); verify(mockContainerManager).addNewContainer(cwp); } @@ -118,11 +120,27 @@ void testContainerMissingFromReconIsAddedWhenMultiplePages() throws Exception { eq(ContainerID.valueOf(3L)), eq(2), eq(CLOSED))) .thenReturn(Collections.singletonList(cid3)); + // Stub getContainer for cid3 (exists in Recon) so processSyncedClosedContainer + // reads its state and confirms no correction is needed. + ContainerInfo closedInfo3 = new ContainerInfo.Builder() + .setContainerID(3L) + .setState(CLOSED) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setOwner("test") + .build(); + when(mockContainerManager.containerExist(cid1)).thenReturn(false); when(mockContainerManager.containerExist(cid2)).thenReturn(false); when(mockContainerManager.containerExist(cid3)).thenReturn(true); - when(mockScmServiceProvider.getContainerWithPipeline(1L)).thenReturn(cwp1); - when(mockScmServiceProvider.getContainerWithPipeline(2L)).thenReturn(cwp2); + when(mockContainerManager.getContainer(cid3)).thenReturn(closedInfo3); + // Pass 1 fetches each missing CLOSED container individually. + when(mockScmServiceProvider.getExistContainerWithPipelinesInBatch( + Collections.singletonList(1L))) + .thenReturn(Collections.singletonList(cwp1)); + when(mockScmServiceProvider.getExistContainerWithPipelinesInBatch( + Collections.singletonList(2L))) + .thenReturn(Collections.singletonList(cwp2)); + // Page 2: cid3 already exists in Recon; no batch call needed for that page. boolean result = pagedHelper.syncWithSCMContainerInfo(); @@ -143,17 +161,27 @@ void testContainerMissingFromReconIsAddedWhenMultiplePages() throws Exception { @Test void testContainerAlreadyInReconIsSkipped() throws Exception { ContainerID cid = ContainerID.valueOf(7L); + // Stub getContainer to return a CLOSED container so processSyncedClosedContainer + // finds no state drift and returns without further action. + ContainerInfo closedInfo = new ContainerInfo.Builder() + .setContainerID(7L) + .setState(CLOSED) + .setReplicationConfig(StandaloneReplicationConfig.getInstance(ONE)) + .setOwner("test") + .build(); when(mockScmServiceProvider.getContainerCount(CLOSED)).thenReturn(1L); when(mockScmServiceProvider.getListOfContainerIDs( eq(ContainerID.valueOf(1L)), eq(1), eq(CLOSED))) .thenReturn(Collections.singletonList(cid)); when(mockContainerManager.containerExist(cid)).thenReturn(true); + when(mockContainerManager.getContainer(cid)).thenReturn(closedInfo); boolean result = syncHelper.syncWithSCMContainerInfo(); assertTrue(result); - verify(mockScmServiceProvider, never()).getContainerWithPipeline(anyLong()); + // Container already in Recon: no batch fetch needed, no add attempted. + verify(mockScmServiceProvider, never()).getExistContainerWithPipelinesInBatch(any()); verify(mockContainerManager, never()).addNewContainer(any()); } @@ -164,7 +192,10 @@ void testZeroClosedContainersReturnsTrue() throws Exception { boolean result = syncHelper.syncWithSCMContainerInfo(); assertTrue(result); - verifyNoInteractions(mockContainerManager); + // Pass 4 calls getContainers() (returns empty list, no action taken) so we assert + // on the meaningful mutations: no containers added, no state transitions applied. + verify(mockContainerManager, never()).addNewContainer(any()); + verify(mockContainerManager, never()).updateContainerState(any(), any()); verify(mockScmServiceProvider, never()) .getListOfContainerIDs(any(), any(Integer.class), any()); } @@ -179,7 +210,11 @@ void testEmptyListFromSCMReturnsFalse() throws Exception { boolean result = syncHelper.syncWithSCMContainerInfo(); assertFalse(result); - verifyNoInteractions(mockContainerManager); + // Empty batch → Pass 1 returns false immediately without adding any containers. + // Pass 4 may call getContainers() (returning empty list, which is harmless), so + // we assert on addNewContainer specifically rather than verifyNoInteractions. + verify(mockContainerManager, never()).addNewContainer(any()); + verify(mockContainerManager, never()).updateContainerState(any(), any()); } }