From 4beed12360d95c0ae8aa75620b9e34ae14cdeb55 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 2 Apr 2026 11:41:21 -0700 Subject: [PATCH 01/16] HDDS-13133. Track Ratis state machine events in OM and SCM metrics Change-Id: I4febee20c124e4c738b8f646f1e347844d2b7346 --- .../placement/metrics/SCMMetrics.java | 22 ++++++++ .../hadoop/hdds/scm/ha/SCMStateMachine.java | 28 ++++++++++ .../hdds/scm/ha/TestSCMStateMachine.java | 56 +++++++++++++++++++ .../org/apache/hadoop/ozone/om/OMMetrics.java | 22 ++++++++ .../om/ratis/OzoneManagerStateMachine.java | 28 ++++++++++ .../ratis/TestOzoneManagerStateMachine.java | 36 ++++++++++++ 6 files changed, 192 insertions(+) create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java index feb4129952d9..26d4200ed6ae 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java @@ -17,6 +17,8 @@ package org.apache.hadoop.hdds.scm.container.placement.metrics; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hdds.utils.DBCheckpointMetrics; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; @@ -24,6 +26,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; +import org.apache.hadoop.util.Time; /** * This class is for maintaining StorageContainerManager statistics. @@ -33,6 +36,9 @@ public class SCMMetrics { public static final String SOURCE_NAME = SCMMetrics.class.getSimpleName(); + private final List ratisEvents = new ArrayList<>(); + private static final int MAX_RATIS_EVENTS = 100; + /** * Container stat metrics, the meaning of following metrics * can be found in {@link ContainerStat}. @@ -155,6 +161,22 @@ public void decrContainerStat(ContainerStat deltaStat) { this.containerReportWriteCount.incr(-1 * deltaStat.getWriteCount().get()); } + public void addRatisEvent(String event) { + synchronized (ratisEvents) { + if (ratisEvents.size() >= MAX_RATIS_EVENTS) { + ratisEvents.remove(0); + } + ratisEvents.add(Time.formatTime(Time.now()) + ": " + event); + } + } + + @Metric("Ratis state machine events") + public String getRatisEvents() { + synchronized (ratisEvents) { + return String.join("\n", ratisEvents); + } + } + public void unRegister() { MetricsSystem ms = DefaultMetricsSystem.instance(); ms.unregisterSource(SOURCE_NAME); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index 9d49ca36b6f2..8c39cbdc2324 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType; import org.apache.hadoop.hdds.scm.block.DeletedBlockLog; import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; import org.apache.hadoop.hdds.scm.exceptions.SCMException; import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; @@ -213,6 +214,10 @@ public void notifyNotLeader(Collection pendingEntries) { return; } LOG.info("current leader SCM steps down."); + SCMMetrics metrics = StorageContainerManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyNotLeader"); + } scm.getScmContext().updateLeaderAndTerm(false, 0); scm.getSCMServiceManager().notifyStatusChanged(); @@ -243,6 +248,12 @@ public CompletableFuture notifyInstallSnapshotFromLeader( final String leaderNodeId = leaderDetails.get().getNodeId(); LOG.info("Received install snapshot notification from SCM leader: {} with " + "term index: {}", leaderAddress, firstTermIndexInLog); + SCMMetrics metrics = StorageContainerManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent( + "notifyInstallSnapshotFromLeader: leaderNodeId=" + leaderNodeId + + ", firstTermIndexInLog=" + firstTermIndexInLog); + } CompletableFuture future = CompletableFuture.supplyAsync( () -> { @@ -283,6 +294,10 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, if (!isInitialized) { return; } + SCMMetrics metrics = StorageContainerManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyLeaderChanged: newLeaderId=" + newLeaderId); + } currentLeaderTerm.set(scm.getScmHAManager().getRatisServer().getDivision() .getInfo().getCurrentTerm()); @@ -394,11 +409,20 @@ public void notifyLeaderReady() { scm.getScmContext().setLeaderReady(); scm.getSCMServiceManager().notifyStatusChanged(); scm.getFinalizationManager().onLeaderReady(); + SCMMetrics metrics = StorageContainerManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyLeaderReady"); + } } @Override public void notifyConfigurationChanged(long term, long index, RaftProtos.RaftConfigurationProto newRaftConfiguration) { + SCMMetrics metrics = StorageContainerManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent( + "notifyConfigurationChanged: term=" + term + ", index=" + index); + } } @Override @@ -431,6 +455,10 @@ public void reinitialize() throws IOException { } LOG.info("{}: SCMStateMachine is reinitializing. newTermIndex = {}", getId(), termIndex); + SCMMetrics metrics = StorageContainerManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("reinitialize: " + termIndex); + } // re-initialize the DBTransactionBuffer and update the lastAppliedIndex. try { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java new file mode 100644 index 000000000000..f0be3200547c --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.ha; + +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.utils.TransactionInfo; +import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.server.protocol.TermIndex; +import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + +/** + * Test SCMStateMachine events recording. + */ +public class TestSCMStateMachine { + + @Test + public void testRatisEventsRecording() throws Exception { + StorageContainerManager scm = mock(StorageContainerManager.class); + SCMMetrics metrics = SCMMetrics.create(); + SCMHADBTransactionBuffer buffer = mock(SCMHADBTransactionBuffer.class); + when(buffer.getLatestTrxInfo()).thenReturn(TransactionInfo.valueOf(TermIndex.valueOf(0, 0))); + + SCMStateMachine stateMachine = new SCMStateMachine(scm, buffer); + + try (MockedStatic scmStaticMock = mockStatic(StorageContainerManager.class)) { + scmStaticMock.when(StorageContainerManager::getMetrics).thenReturn(metrics); + + stateMachine.notifyConfigurationChanged(1, 1, RaftProtos.RaftConfigurationProto.getDefaultInstance()); + assertTrue(metrics.getRatisEvents().contains("notifyConfigurationChanged")); + } + + metrics.unRegister(); + } +} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index 9d2cab2fc507..3d00f192461f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.om; import com.google.common.annotations.VisibleForTesting; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.DBCheckpointMetrics; @@ -28,6 +30,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.ozone.om.snapshot.OMSnapshotDirectoryMetrics; +import org.apache.hadoop.util.Time; /** * This class is for maintaining Ozone Manager statistics. @@ -38,6 +41,9 @@ public class OMMetrics implements OmMetadataReaderMetrics { private static final String SOURCE_NAME = OMMetrics.class.getSimpleName(); + private final List ratisEvents = new ArrayList<>(); + private static final int MAX_RATIS_EVENTS = 100; + // OM request type op metrics private @Metric MutableCounterLong numVolumeOps; private @Metric MutableCounterLong numBucketOps; @@ -1570,6 +1576,22 @@ public void incNumRecoverLeaseFails() { numRecoverLeaseFails.incr(); } + public void addRatisEvent(String event) { + synchronized (ratisEvents) { + if (ratisEvents.size() >= MAX_RATIS_EVENTS) { + ratisEvents.remove(0); + } + ratisEvents.add(Time.formatTime(Time.now()) + ": " + event); + } + } + + @Metric("Ratis state machine events") + public String getRatisEvents() { + synchronized (ratisEvents) { + return String.join("\n", ratisEvents); + } + } + public void unRegister() { if (dbCheckpointMetrics != null) { dbCheckpointMetrics.unRegister(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index 09a530ab6cc7..65bbee76cdd3 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -41,6 +41,7 @@ import org.apache.hadoop.ozone.audit.AuditLoggerType; import org.apache.hadoop.ozone.audit.OMSystemAction; import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.OzoneManagerPrepareState; import org.apache.hadoop.ozone.om.exceptions.OMException; @@ -170,6 +171,10 @@ public synchronized void reinitialize() throws IOException { final TermIndex lastApplied = getLastAppliedTermIndex(); unpause(lastApplied.getIndex(), lastApplied.getTerm()); LOG.info("{}: reinitialize {} with {}", getId(), getGroupId(), lastApplied); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("reinitialize: " + lastApplied); + } } } @@ -183,6 +188,10 @@ public SnapshotInfo getLatestSnapshot() { @Override public void notifyLeaderReady() { ozoneManager.getOmSnapshotManager().resetInFlightSnapshotCount(); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyLeaderReady"); + } } @Override @@ -208,6 +217,10 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, AUDIT.logWriteSuccess(ozoneManager.buildAuditMessageForSuccess(OMSystemAction.LEADER_CHANGE, auditParams)); LOG.info("{}: leader changed to {}", groupMemberId, newLeaderId); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyLeaderChanged: newLeaderId=" + newLeaderId); + } } /** Notified by Ratis for non-StateMachine term-index update. */ @@ -287,6 +300,11 @@ public void notifyConfigurationChanged(long term, long index, } // Check and update the peer list in OzoneManager ozoneManager.updatePeerList(newPeerIds); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyConfigurationChanged: " + + "term=" + term + ", index=" + index + ", newPeers=" + newPeerIds); + } } /** @@ -301,6 +319,11 @@ public void notifySnapshotInstalled(RaftProtos.InstallSnapshotResult result, long snapshotIndex, RaftPeer peer) { LOG.info("Receive notifySnapshotInstalled event {} for the peer: {}" + " snapshotIndex: {}.", result, peer.getId(), snapshotIndex); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifySnapshotInstalled: " + + "result=" + result + ", snapshotIndex=" + snapshotIndex + ", peer=" + peer.getId()); + } switch (result) { case SUCCESS: case SNAPSHOT_UNAVAILABLE: @@ -581,6 +604,11 @@ public CompletableFuture notifyInstallSnapshotFromLeader( .getLeaderInfo().getId().getId()).toString(); LOG.info("Received install snapshot notification from OM leader: {} with " + "term index: {}", leaderNodeId, firstTermIndexInLog); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("notifyInstallSnapshotFromLeader: " + + "leaderNodeId=" + leaderNodeId + ", firstTermIndexInLog=" + firstTermIndexInLog); + } return CompletableFuture.supplyAsync( () -> { diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java index 36d7a80aeea9..2734a3f214b4 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java @@ -45,6 +45,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.NettyMetrics; +import org.apache.hadoop.ozone.audit.AuditMessage; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; @@ -54,6 +56,7 @@ import org.apache.hadoop.ozone.audit.OMSystemAction; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OmConfig; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshotManager; @@ -1037,4 +1040,37 @@ private RaftClientRequest buildClientRequest( .setType(RaftClientRequest.writeRequestType()) .build(); } + + @Test + public void testRatisEventsRecording() { + OzoneManager om = mock(OzoneManager.class); + OMMetrics metrics = OMMetrics.create(); + when(om.getMetrics()).thenReturn(metrics); + when(om.getOmSnapshotManager()).thenReturn(mock(OmSnapshotManager.class)); + when(om.getConfiguration()).thenReturn(new OzoneConfiguration()); + AuditMessage auditMessage = mock(AuditMessage.class); + when(auditMessage.getOp()).thenReturn("LEADER_CHANGE"); + when(om.buildAuditMessageForSuccess(any(), any())).thenReturn(auditMessage); + + OzoneManagerStateMachine sm = new OzoneManagerStateMachine( + om, + mock(OzoneManagerDoubleBuffer.class), + mock(RequestHandler.class), + mock(ExecutorService.class), + mock(NettyMetrics.class) + ); + + sm.notifyLeaderReady(); + assertTrue(metrics.getRatisEvents().contains("notifyLeaderReady")); + + RaftGroupMemberId groupMemberId = mock(RaftGroupMemberId.class); + when(groupMemberId.getPeerId()).thenReturn(RaftPeerId.valueOf("peer1")); + sm.notifyLeaderChanged(groupMemberId, RaftPeerId.valueOf("peer1")); + assertTrue(metrics.getRatisEvents().contains("notifyLeaderChanged: newLeaderId=peer1")); + + sm.notifyConfigurationChanged(1, 1, RaftProtos.RaftConfigurationProto.getDefaultInstance()); + assertTrue(metrics.getRatisEvents().contains("notifyConfigurationChanged")); + + metrics.unRegister(); + } } From bb7017c128e76a26426b2ea202af4c352593058f Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 2 Apr 2026 11:56:42 -0700 Subject: [PATCH 02/16] HDDS-13133. Implement frontend for Ratis event timeline in OM and SCM web UI Change-Id: If8d7504ade66706ba1feb5fe05cd2def14c1ec5c --- .../webapps/static/templates/menu.html | 1 + .../resources/webapps/scm/ratis-events.html | 30 +++++++++++++++++++ .../src/main/resources/webapps/scm/scm.js | 19 ++++++++++++ .../webapps/ozoneManager/ozoneManager.js | 14 +++++++++ .../webapps/ozoneManager/ratis-events.html | 30 +++++++++++++++++++ 5 files changed, 94 insertions(+) create mode 100644 hadoop-hdds/server-scm/src/main/resources/webapps/scm/ratis-events.html create mode 100644 hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html diff --git a/hadoop-hdds/framework/src/main/resources/webapps/static/templates/menu.html b/hadoop-hdds/framework/src/main/resources/webapps/static/templates/menu.html index 1963a6543835..35a3410ef032 100644 --- a/hadoop-hdds/framework/src/main/resources/webapps/static/templates/menu.html +++ b/hadoop-hdds/framework/src/main/resources/webapps/static/templates/menu.html @@ -34,6 +34,7 @@
  • Configuration
  • +
  • Ratis event timeline
  • Documentation
  • +

    Ratis event timeline

    + + + + + + + + + + + + +
    Event
    {{event}}
    diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index eca79852e43b..e403367e4ad8 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -19,6 +19,25 @@ "use strict"; angular.module('scm', ['ozone', 'nvd3']); + angular.module('scm').config(function ($routeProvider) { + $routeProvider + .when("/ratis_events", { + template: "" + }); + }); + + angular.module('scm').component('ratisEvents', { + templateUrl: 'ratis-events.html', + controller: function ($http) { + var ctrl = this; + $http.get("jmx?qry=Hadoop:service=StorageContainerManager,name=SCMMetrics") + .then(function (result) { + var metrics = result.data.beans[0]; + ctrl.events = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + }); + } + }); + angular.module('scm').component('scmOverview', { templateUrl: 'scm-overview.html', require: { diff --git a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js index e98d3f7ba3a7..f5f12f8b4cec 100644 --- a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js +++ b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js @@ -30,6 +30,9 @@ }) .when("/snapshots", { template: "" + }) + .when("/ratis_events", { + template: "" }); }); angular.module('ozoneManager').component('omSnapshots', { @@ -131,6 +134,17 @@ } } }); + angular.module('ozoneManager').component('ratisEvents', { + templateUrl: 'ratis-events.html', + controller: function ($http) { + var ctrl = this; + $http.get("jmx?qry=Hadoop:service=OzoneManager,name=OMMetrics") + .then(function (result) { + var metrics = result.data.beans[0]; + ctrl.events = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + }); + } + }); angular.module('ozoneManager').component('omMetrics', { templateUrl: 'om-metrics.html', controller: function ($http) { diff --git a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html new file mode 100644 index 000000000000..caf9fcf8aa62 --- /dev/null +++ b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html @@ -0,0 +1,30 @@ + +

    Ratis event timeline

    + + + + + + + + + + + + +
    Event
    {{event}}
    From f34f9c161ea90da2a0cd83cadec1084a4ee0c50e Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 2 Apr 2026 12:53:25 -0700 Subject: [PATCH 03/16] HDDS-13133. Add timestamps to Ratis events and update UI to two columns Change-Id: I7164f2d08a0c1ed6a8bf00ada697672f180bd642 --- .../hdds/scm/container/placement/metrics/SCMMetrics.java | 2 +- .../src/main/resources/webapps/scm/ratis-events.html | 6 ++++-- .../server-scm/src/main/resources/webapps/scm/scm.js | 9 ++++++++- .../main/java/org/apache/hadoop/ozone/om/OMMetrics.java | 2 +- .../main/resources/webapps/ozoneManager/ozoneManager.js | 9 ++++++++- .../resources/webapps/ozoneManager/ratis-events.html | 6 ++++-- 6 files changed, 26 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java index 26d4200ed6ae..a0c167ff2e86 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java @@ -166,7 +166,7 @@ public void addRatisEvent(String event) { if (ratisEvents.size() >= MAX_RATIS_EVENTS) { ratisEvents.remove(0); } - ratisEvents.add(Time.formatTime(Time.now()) + ": " + event); + ratisEvents.add(Time.formatTime(Time.now()) + "|" + event); } } diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/ratis-events.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/ratis-events.html index caf9fcf8aa62..9eb6008d00ff 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/ratis-events.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/ratis-events.html @@ -19,12 +19,14 @@

    Ratis event timeline

    - + + - + +
    EventTimestampEvent description
    {{event}}{{event.timestamp}}{{event.description}}
    diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index e403367e4ad8..f74b24e8b62d 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -33,7 +33,14 @@ $http.get("jmx?qry=Hadoop:service=StorageContainerManager,name=SCMMetrics") .then(function (result) { var metrics = result.data.beans[0]; - ctrl.events = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + var rawEvents = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + ctrl.events = rawEvents.map(function(e) { + var parts = e.split('|'); + return { + timestamp: parts[0], + description: parts[1] + }; + }); }); } }); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index 3d00f192461f..cb3745beb636 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -1581,7 +1581,7 @@ public void addRatisEvent(String event) { if (ratisEvents.size() >= MAX_RATIS_EVENTS) { ratisEvents.remove(0); } - ratisEvents.add(Time.formatTime(Time.now()) + ": " + event); + ratisEvents.add(Time.formatTime(Time.now()) + "|" + event); } } diff --git a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js index f5f12f8b4cec..155fd0f9ffaa 100644 --- a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js +++ b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js @@ -141,7 +141,14 @@ $http.get("jmx?qry=Hadoop:service=OzoneManager,name=OMMetrics") .then(function (result) { var metrics = result.data.beans[0]; - ctrl.events = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + var rawEvents = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + ctrl.events = rawEvents.map(function(e) { + var parts = e.split('|'); + return { + timestamp: parts[0], + description: parts[1] + }; + }); }); } }); diff --git a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html index caf9fcf8aa62..9eb6008d00ff 100644 --- a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html +++ b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ratis-events.html @@ -19,12 +19,14 @@

    Ratis event timeline

    - + + - + +
    EventTimestampEvent description
    {{event}}{{event.timestamp}}{{event.description}}
    From c5c1df2b9ad7203b80fb4a29078c921f30e394e7 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 2 Apr 2026 13:51:29 -0700 Subject: [PATCH 04/16] Update SCM ratis event text. Change-Id: Ia568c9ce99631d2f780a4d833d357748e51fa34b --- .../hadoop/hdds/scm/ha/SCMStateMachine.java | 35 ++++++++++++---- .../om/ratis/OzoneManagerStateMachine.java | 41 +++++++++++++------ .../webapps/ozoneManager/ozoneManager.js | 2 +- 3 files changed, 57 insertions(+), 21 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index 8c39cbdc2324..81fcb70078b0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.util.ArrayList; import java.util.Collection; import java.util.EnumMap; import java.util.List; @@ -216,7 +217,7 @@ public void notifyNotLeader(Collection pendingEntries) { LOG.info("current leader SCM steps down."); SCMMetrics metrics = StorageContainerManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifyNotLeader"); + metrics.addRatisEvent("current leader SCM steps down."); } scm.getScmContext().updateLeaderAndTerm(false, 0); @@ -251,8 +252,8 @@ public CompletableFuture notifyInstallSnapshotFromLeader( SCMMetrics metrics = StorageContainerManager.getMetrics(); if (metrics != null) { metrics.addRatisEvent( - "notifyInstallSnapshotFromLeader: leaderNodeId=" + leaderNodeId + - ", firstTermIndexInLog=" + firstTermIndexInLog); + "Installing snapshot from SCM leader " + leaderNodeId + + ", term index: " + firstTermIndexInLog); } CompletableFuture future = CompletableFuture.supplyAsync( @@ -295,9 +296,6 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, return; } SCMMetrics metrics = StorageContainerManager.getMetrics(); - if (metrics != null) { - metrics.addRatisEvent("notifyLeaderChanged: newLeaderId=" + newLeaderId); - } currentLeaderTerm.set(scm.getScmHAManager().getRatisServer().getDivision() .getInfo().getCurrentTerm()); @@ -312,10 +310,16 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, if (!groupMemberId.getPeerId().equals(newLeaderId)) { LOG.info("leader changed, yet current SCM is still follower."); + if (metrics != null) { + metrics.addRatisEvent("Leader changed to " + newLeaderId + ", yet current SCM is still follower."); + } return; } LOG.info("current SCM becomes leader of term {}.", currentLeaderTerm); + if (metrics != null) { + metrics.addRatisEvent("current SCM becomes leader of term " + currentLeaderTerm); + } scm.getScmContext().updateLeaderAndTerm(true, currentLeaderTerm.get()); @@ -411,7 +415,7 @@ public void notifyLeaderReady() { scm.getFinalizationManager().onLeaderReady(); SCMMetrics metrics = StorageContainerManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifyLeaderReady"); + metrics.addRatisEvent("Ready to serve requests as the leader"); } } @@ -420,8 +424,23 @@ public void notifyConfigurationChanged(long term, long index, RaftProtos.RaftConfigurationProto newRaftConfiguration) { SCMMetrics metrics = StorageContainerManager.getMetrics(); if (metrics != null) { + List newPeers = + newRaftConfiguration.getPeersList(); + List newListeners = + newRaftConfiguration.getListenersList(); + List newPeerIds = new ArrayList<>(); + List newListenersIds = new ArrayList<>(); + for (RaftProtos.RaftPeerProto raftPeerProto : newPeers) { + newPeerIds.add(RaftPeerId.valueOf(raftPeerProto.getId()).toString()); + } + for (RaftProtos.RaftPeerProto raftListenerProto : newListeners) { + newListenersIds.add(RaftPeerId.valueOf(raftListenerProto.getId()).toString()); + } metrics.addRatisEvent( - "notifyConfigurationChanged: term=" + term + ", index=" + index); + "New peers " + newPeerIds + + (newListenersIds.isEmpty()? "" : ", new listeners " + newListenersIds) + + " added at term index (" + + term + ", " + index + ")"); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index 65bbee76cdd3..ccb469b2d9c9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -26,6 +26,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -190,7 +191,16 @@ public void notifyLeaderReady() { ozoneManager.getOmSnapshotManager().resetInFlightSnapshotCount(); OMMetrics metrics = ozoneManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifyLeaderReady"); + metrics.addRatisEvent("Ready to serve requests as the leader"); + } + } + + @Override + public void notifyNotLeader(Collection pendingEntries) { + LOG.info("current leader OM steps down."); + OMMetrics metrics = ozoneManager.getMetrics(); + if (metrics != null) { + metrics.addRatisEvent("current leader OM steps down."); } } @@ -219,7 +229,7 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, LOG.info("{}: leader changed to {}", groupMemberId, newLeaderId); OMMetrics metrics = ozoneManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifyLeaderChanged: newLeaderId=" + newLeaderId); + metrics.addRatisEvent("Leader changed to " + newLeaderId); } } @@ -295,16 +305,23 @@ public void notifyConfigurationChanged(long term, long index, for (RaftProtos.RaftPeerProto raftPeerProto : newPeers) { newPeerIds.add(RaftPeerId.valueOf(raftPeerProto.getId()).toString()); } - for (RaftProtos.RaftPeerProto raftPeerProto : newListeners) { - newPeerIds.add(RaftPeerId.valueOf(raftPeerProto.getId()).toString()); + List newListenersIds = new ArrayList<>(); + for (RaftProtos.RaftPeerProto raftListenerProto : newListeners) { + newListenersIds.add(RaftPeerId.valueOf(raftListenerProto.getId()).toString()); } - // Check and update the peer list in OzoneManager - ozoneManager.updatePeerList(newPeerIds); + OMMetrics metrics = ozoneManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifyConfigurationChanged: " + - "term=" + term + ", index=" + index + ", newPeers=" + newPeerIds); + metrics.addRatisEvent( + "New peers " + newPeerIds + + (newListenersIds.isEmpty()? "" : ", new listeners " + newListenersIds) + + " added at term index (" + + term + ", " + index + ")"); } + + // Check and update the peer list in OzoneManager + newPeerIds.addAll(newListenersIds); + ozoneManager.updatePeerList(newPeerIds); } /** @@ -321,8 +338,8 @@ public void notifySnapshotInstalled(RaftProtos.InstallSnapshotResult result, " snapshotIndex: {}.", result, peer.getId(), snapshotIndex); OMMetrics metrics = ozoneManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifySnapshotInstalled: " + - "result=" + result + ", snapshotIndex=" + snapshotIndex + ", peer=" + peer.getId()); + metrics.addRatisEvent("Install snapshot " + + result + ", snapshotIndex=" + snapshotIndex + ", peer=" + peer.getId()); } switch (result) { case SUCCESS: @@ -606,8 +623,8 @@ public CompletableFuture notifyInstallSnapshotFromLeader( "term index: {}", leaderNodeId, firstTermIndexInLog); OMMetrics metrics = ozoneManager.getMetrics(); if (metrics != null) { - metrics.addRatisEvent("notifyInstallSnapshotFromLeader: " + - "leaderNodeId=" + leaderNodeId + ", firstTermIndexInLog=" + firstTermIndexInLog); + metrics.addRatisEvent("Installing snapshot from " + + "OM leader " + leaderNodeId + ", term index: " + firstTermIndexInLog); } return CompletableFuture.supplyAsync( diff --git a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js index 155fd0f9ffaa..8b64a082362f 100644 --- a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js +++ b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js @@ -141,7 +141,7 @@ $http.get("jmx?qry=Hadoop:service=OzoneManager,name=OMMetrics") .then(function (result) { var metrics = result.data.beans[0]; - var rawEvents = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + var rawEvents = metrics['tag.RatisEvents'] ? metrics['tag.RatisEvents'].split('\n') : []; ctrl.events = rawEvents.map(function(e) { var parts = e.split('|'); return { From b91c045eb4546bce10ebe14af77306d0a23e3e88 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 2 Apr 2026 13:52:30 -0700 Subject: [PATCH 05/16] SCM js Change-Id: Ib27d985d730869e99e38c27394fd858581d5b756 --- hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index f74b24e8b62d..1e44826f980e 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -33,7 +33,7 @@ $http.get("jmx?qry=Hadoop:service=StorageContainerManager,name=SCMMetrics") .then(function (result) { var metrics = result.data.beans[0]; - var rawEvents = metrics.RatisEvents ? metrics.RatisEvents.split('\n') : []; + var rawEvents = metrics['tag.RatisEvents'] ? metrics['tag.RatisEvents'].split('\n') : []; ctrl.events = rawEvents.map(function(e) { var parts = e.split('|'); return { From bb110618f4cf12c14e295b2962c31c313f27c248 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 2 Apr 2026 15:09:06 -0700 Subject: [PATCH 06/16] Checkstyle Change-Id: I025a3d1aab863c766f584b162931b9ec56bc79e0 --- .../hadoop/hdds/scm/ha/SCMStateMachine.java | 2 +- .../hdds/scm/ha/TestSCMStateMachine.java | 12 ++++++------ .../om/ratis/OzoneManagerStateMachine.java | 2 +- .../om/ratis/TestOzoneManagerStateMachine.java | 18 +++--------------- 4 files changed, 11 insertions(+), 23 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index 81fcb70078b0..fa48271a745c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -438,7 +438,7 @@ public void notifyConfigurationChanged(long term, long index, } metrics.addRatisEvent( "New peers " + newPeerIds + - (newListenersIds.isEmpty()? "" : ", new listeners " + newListenersIds) + + (newListenersIds.isEmpty() ? "" : ", new listeners " + newListenersIds) + " added at term index (" + term + ", " + index + ")"); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java index f0be3200547c..6672d764b7c8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java @@ -17,6 +17,11 @@ package org.apache.hadoop.hdds.scm.ha; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.utils.TransactionInfo; @@ -25,11 +30,6 @@ import org.junit.jupiter.api.Test; import org.mockito.MockedStatic; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockStatic; -import static org.mockito.Mockito.when; - /** * Test SCMStateMachine events recording. */ @@ -48,7 +48,7 @@ public void testRatisEventsRecording() throws Exception { scmStaticMock.when(StorageContainerManager::getMetrics).thenReturn(metrics); stateMachine.notifyConfigurationChanged(1, 1, RaftProtos.RaftConfigurationProto.getDefaultInstance()); - assertTrue(metrics.getRatisEvents().contains("notifyConfigurationChanged")); + assertTrue(metrics.getRatisEvents().contains("New peers [] added at term index")); } metrics.unRegister(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java index ccb469b2d9c9..2abaf9ae5719 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java @@ -314,7 +314,7 @@ public void notifyConfigurationChanged(long term, long index, if (metrics != null) { metrics.addRatisEvent( "New peers " + newPeerIds + - (newListenersIds.isEmpty()? "" : ", new listeners " + newListenersIds) + + (newListenersIds.isEmpty() ? "" : ", new listeners " + newListenersIds) + " added at term index (" + term + ", " + index + ")"); } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java index 2734a3f214b4..f4be86fcf588 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java @@ -45,8 +45,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.utils.NettyMetrics; -import org.apache.hadoop.ozone.audit.AuditMessage; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; @@ -1043,7 +1041,6 @@ private RaftClientRequest buildClientRequest( @Test public void testRatisEventsRecording() { - OzoneManager om = mock(OzoneManager.class); OMMetrics metrics = OMMetrics.create(); when(om.getMetrics()).thenReturn(metrics); when(om.getOmSnapshotManager()).thenReturn(mock(OmSnapshotManager.class)); @@ -1051,25 +1048,16 @@ public void testRatisEventsRecording() { AuditMessage auditMessage = mock(AuditMessage.class); when(auditMessage.getOp()).thenReturn("LEADER_CHANGE"); when(om.buildAuditMessageForSuccess(any(), any())).thenReturn(auditMessage); - - OzoneManagerStateMachine sm = new OzoneManagerStateMachine( - om, - mock(OzoneManagerDoubleBuffer.class), - mock(RequestHandler.class), - mock(ExecutorService.class), - mock(NettyMetrics.class) - ); - sm.notifyLeaderReady(); - assertTrue(metrics.getRatisEvents().contains("notifyLeaderReady")); + assertTrue(metrics.getRatisEvents().contains("Ready to serve requests as the leader")); RaftGroupMemberId groupMemberId = mock(RaftGroupMemberId.class); when(groupMemberId.getPeerId()).thenReturn(RaftPeerId.valueOf("peer1")); sm.notifyLeaderChanged(groupMemberId, RaftPeerId.valueOf("peer1")); - assertTrue(metrics.getRatisEvents().contains("notifyLeaderChanged: newLeaderId=peer1")); + assertTrue(metrics.getRatisEvents().contains("Leader changed to peer1")); sm.notifyConfigurationChanged(1, 1, RaftProtos.RaftConfigurationProto.getDefaultInstance()); - assertTrue(metrics.getRatisEvents().contains("notifyConfigurationChanged")); + assertTrue(metrics.getRatisEvents().contains("New peers [] added at term index ")); metrics.unRegister(); } From fc5419f18339f21cee9b34c6899c5379ddbe90d5 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Mon, 13 Apr 2026 21:08:28 -0700 Subject: [PATCH 07/16] HDDS-13133: Improve Ratis event recording in SCM and OM Change-Id: I0a3755dab3744a08f477aa5943df51ae01813465 --- .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 4 + .../placement/metrics/SCMMetrics.java | 25 ++++-- .../hadoop/hdds/scm/ha/SCMStateMachine.java | 76 +++++++------------ .../scm/server/StorageContainerManager.java | 6 +- .../apache/hadoop/ozone/om/OMConfigKeys.java | 4 + .../org/apache/hadoop/ozone/om/OMMetrics.java | 23 ++++-- 6 files changed, 72 insertions(+), 66 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index 2c44fa881c41..4bb4f32d57c8 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -628,6 +628,10 @@ public final class ScmConfigKeys { public static final String OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT = "ozone.scm.ha.raft.server.rpc.first-election.timeout"; + public static final String OZONE_SCM_RATIS_EVENTS_MAX_LIMIT = + "ozone.scm.ratis.events.max.limit"; + public static final int OZONE_SCM_RATIS_EVENTS_MAX_LIMIT_DEFAULT = 100; + /** * Never constructed. */ diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java index a0c167ff2e86..25c248739fbf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java @@ -17,8 +17,10 @@ package org.apache.hadoop.hdds.scm.container.placement.metrics; -import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.utils.DBCheckpointMetrics; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; @@ -36,8 +38,8 @@ public class SCMMetrics { public static final String SOURCE_NAME = SCMMetrics.class.getSimpleName(); - private final List ratisEvents = new ArrayList<>(); - private static final int MAX_RATIS_EVENTS = 100; + private final LinkedList ratisEvents = new LinkedList<>(); + private final int maxRatisEvents; /** * Container stat metrics, the meaning of following metrics @@ -65,14 +67,23 @@ public DBCheckpointMetrics getDBCheckpointMetrics() { return dbCheckpointMetrics; } - public SCMMetrics() { + public SCMMetrics(int maxRatisEvents) { dbCheckpointMetrics = DBCheckpointMetrics.create("SCM Metrics"); + this.maxRatisEvents = maxRatisEvents; } public static SCMMetrics create() { + return create(null); + } + + public static SCMMetrics create(ConfigurationSource conf) { MetricsSystem ms = DefaultMetricsSystem.instance(); + int maxRatisEvents = conf == null + ? ScmConfigKeys.OZONE_SCM_RATIS_EVENTS_MAX_LIMIT_DEFAULT + : conf.getInt(ScmConfigKeys.OZONE_SCM_RATIS_EVENTS_MAX_LIMIT, + ScmConfigKeys.OZONE_SCM_RATIS_EVENTS_MAX_LIMIT_DEFAULT); return ms.register(SOURCE_NAME, "Storage Container Manager Metrics", - new SCMMetrics()); + new SCMMetrics(maxRatisEvents)); } public void setLastContainerReportSize(long size) { @@ -163,8 +174,8 @@ public void decrContainerStat(ContainerStat deltaStat) { public void addRatisEvent(String event) { synchronized (ratisEvents) { - if (ratisEvents.size() >= MAX_RATIS_EVENTS) { - ratisEvents.remove(0); + if (ratisEvents.size() >= maxRatisEvents) { + ratisEvents.removeFirst(); } ratisEvents.add(Time.formatTime(Time.now()) + "|" + event); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index fa48271a745c..fbcf44401b1a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -23,6 +23,7 @@ import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.TextFormat; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -80,6 +81,7 @@ public class SCMStateMachine extends BaseStateMachine { private StorageContainerManager scm; private Map handlers; private SCMHADBTransactionBuffer transactionBuffer; + private final SCMMetrics metrics; private final SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); private final boolean isInitialized; @@ -96,6 +98,7 @@ public SCMStateMachine(final StorageContainerManager scm, this.scm = scm; this.handlers = new EnumMap<>(RequestType.class); this.transactionBuffer = buffer; + this.metrics = StorageContainerManager.getMetrics(); TransactionInfo latestTrxInfo = this.transactionBuffer.getLatestTrxInfo(); if (!latestTrxInfo.isDefault()) { updateLastAppliedTermIndex(latestTrxInfo.getTerm(), @@ -113,12 +116,19 @@ public SCMStateMachine(final StorageContainerManager scm, public SCMStateMachine() { isInitialized = false; + this.metrics = null; } public void registerHandler(RequestType type, Object handler) { handlers.put(type, handler); } + private void addRatisEvent(String message) { + if (metrics != null) { + metrics.addRatisEvent(message); + } + } + @Override public SnapshotInfo getLatestSnapshot() { // Transaction buffer will be null during scm initlialization phase @@ -214,11 +224,9 @@ public void notifyNotLeader(Collection pendingEntries) { if (!isInitialized) { return; } - LOG.info("current leader SCM steps down."); - SCMMetrics metrics = StorageContainerManager.getMetrics(); - if (metrics != null) { - metrics.addRatisEvent("current leader SCM steps down."); - } + String message = "SCM " + scm.getScmId() + " steps down from being leader."; + LOG.info(message); + addRatisEvent(message); scm.getScmContext().updateLeaderAndTerm(false, 0); scm.getSCMServiceManager().notifyStatusChanged(); @@ -249,12 +257,8 @@ public CompletableFuture notifyInstallSnapshotFromLeader( final String leaderNodeId = leaderDetails.get().getNodeId(); LOG.info("Received install snapshot notification from SCM leader: {} with " + "term index: {}", leaderAddress, firstTermIndexInLog); - SCMMetrics metrics = StorageContainerManager.getMetrics(); - if (metrics != null) { - metrics.addRatisEvent( - "Installing snapshot from SCM leader " + leaderNodeId + - ", term index: " + firstTermIndexInLog); - } + addRatisEvent("Installing snapshot from SCM leader " + leaderNodeId + + ", term index: " + firstTermIndexInLog); CompletableFuture future = CompletableFuture.supplyAsync( () -> { @@ -295,7 +299,6 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, if (!isInitialized) { return; } - SCMMetrics metrics = StorageContainerManager.getMetrics(); currentLeaderTerm.set(scm.getScmHAManager().getRatisServer().getDivision() .getInfo().getCurrentTerm()); @@ -309,17 +312,17 @@ public void notifyLeaderChanged(RaftGroupMemberId groupMemberId, } if (!groupMemberId.getPeerId().equals(newLeaderId)) { - LOG.info("leader changed, yet current SCM is still follower."); - if (metrics != null) { - metrics.addRatisEvent("Leader changed to " + newLeaderId + ", yet current SCM is still follower."); - } + String message = "Leader changed to " + newLeaderId + + ", current SCM " + scm.getScmId() + " is still follower."; + LOG.info(message); + addRatisEvent(message); return; } - LOG.info("current SCM becomes leader of term {}.", currentLeaderTerm); - if (metrics != null) { - metrics.addRatisEvent("current SCM becomes leader of term " + currentLeaderTerm); - } + String message = "current SCM " + scm.getScmId() + + " becomes leader of term " + currentLeaderTerm; + LOG.info(message); + addRatisEvent(message); scm.getScmContext().updateLeaderAndTerm(true, currentLeaderTerm.get()); @@ -413,35 +416,15 @@ public void notifyLeaderReady() { scm.getScmContext().setLeaderReady(); scm.getSCMServiceManager().notifyStatusChanged(); scm.getFinalizationManager().onLeaderReady(); - SCMMetrics metrics = StorageContainerManager.getMetrics(); - if (metrics != null) { - metrics.addRatisEvent("Ready to serve requests as the leader"); - } + addRatisEvent("SCM " + scm.getScmId() + + " is ready to serve requests as the leader"); } @Override public void notifyConfigurationChanged(long term, long index, RaftProtos.RaftConfigurationProto newRaftConfiguration) { - SCMMetrics metrics = StorageContainerManager.getMetrics(); - if (metrics != null) { - List newPeers = - newRaftConfiguration.getPeersList(); - List newListeners = - newRaftConfiguration.getListenersList(); - List newPeerIds = new ArrayList<>(); - List newListenersIds = new ArrayList<>(); - for (RaftProtos.RaftPeerProto raftPeerProto : newPeers) { - newPeerIds.add(RaftPeerId.valueOf(raftPeerProto.getId()).toString()); - } - for (RaftProtos.RaftPeerProto raftListenerProto : newListeners) { - newListenersIds.add(RaftPeerId.valueOf(raftListenerProto.getId()).toString()); - } - metrics.addRatisEvent( - "New peers " + newPeerIds + - (newListenersIds.isEmpty() ? "" : ", new listeners " + newListenersIds) + - " added at term index (" + - term + ", " + index + ")"); - } + addRatisEvent("Configuration changed at term index (" + term + ", " + index + + ") to " + TextFormat.shortDebugString(newRaftConfiguration)); } @Override @@ -474,10 +457,7 @@ public void reinitialize() throws IOException { } LOG.info("{}: SCMStateMachine is reinitializing. newTermIndex = {}", getId(), termIndex); - SCMMetrics metrics = StorageContainerManager.getMetrics(); - if (metrics != null) { - metrics.addRatisEvent("reinitialize: " + termIndex); - } + addRatisEvent("reinitialize: " + termIndex); // re-initialize the DBTransactionBuffer and update the lastAppliedIndex. try { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 696816b85a24..66d25195c2ab 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -1427,15 +1427,15 @@ private static InetSocketAddress getScmAddress(SCMHANodeDetails haDetails, /** * Initialize SCM metrics. */ - public static void initMetrics() { - metrics = SCMMetrics.create(); + public void initMetrics() { + metrics = SCMMetrics.create(configuration); } /** * Return SCM metrics instance. */ public static SCMMetrics getMetrics() { - return metrics == null ? SCMMetrics.create() : metrics; + return metrics; } /** diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 05083d506304..6d83209dd0fd 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -691,6 +691,10 @@ public final class OMConfigKeys { "ozone.om.snapshot.local.data.manager.service.interval"; public static final String OZONE_OM_SNAPSHOT_LOCAL_DATA_MANAGER_SERVICE_INTERVAL_DEFAULT = "5m"; + public static final String OZONE_OM_RATIS_EVENTS_MAX_LIMIT = + "ozone.om.ratis.events.max.limit"; + public static final int OZONE_OM_RATIS_EVENTS_MAX_LIMIT_DEFAULT = 100; + /** * Never constructed. */ diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index cb3745beb636..3bc049683458 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -18,9 +18,10 @@ package org.apache.hadoop.ozone.om; import com.google.common.annotations.VisibleForTesting; -import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.utils.DBCheckpointMetrics; import org.apache.hadoop.metrics2.MetricsSystem; @@ -29,6 +30,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.snapshot.OMSnapshotDirectoryMetrics; import org.apache.hadoop.util.Time; @@ -41,8 +43,8 @@ public class OMMetrics implements OmMetadataReaderMetrics { private static final String SOURCE_NAME = OMMetrics.class.getSimpleName(); - private final List ratisEvents = new ArrayList<>(); - private static final int MAX_RATIS_EVENTS = 100; + private final LinkedList ratisEvents = new LinkedList<>(); + private final int maxRatisEvents; // OM request type op metrics private @Metric MutableCounterLong numVolumeOps; @@ -262,15 +264,20 @@ public class OMMetrics implements OmMetadataReaderMetrics { private final DBCheckpointMetrics dbCheckpointMetrics; private OMSnapshotDirectoryMetrics snapshotDirectoryMetrics; - public OMMetrics() { + public OMMetrics(int maxRatisEvents) { dbCheckpointMetrics = DBCheckpointMetrics.create("OM Metrics"); + this.maxRatisEvents = maxRatisEvents; } - public static OMMetrics create() { + public static OMMetrics create(ConfigurationSource conf) { MetricsSystem ms = DefaultMetricsSystem.instance(); + int maxRatisEvents = conf == null + ? OMConfigKeys.OZONE_OM_RATIS_EVENTS_MAX_LIMIT_DEFAULT + : conf.getInt(OMConfigKeys.OZONE_OM_RATIS_EVENTS_MAX_LIMIT, + OMConfigKeys.OZONE_OM_RATIS_EVENTS_MAX_LIMIT_DEFAULT); return ms.register(SOURCE_NAME, "Ozone Manager Metrics", - new OMMetrics()); + new OMMetrics(maxRatisEvents)); } public DBCheckpointMetrics getDBCheckpointMetrics() { @@ -1578,8 +1585,8 @@ public void incNumRecoverLeaseFails() { public void addRatisEvent(String event) { synchronized (ratisEvents) { - if (ratisEvents.size() >= MAX_RATIS_EVENTS) { - ratisEvents.remove(0); + if (ratisEvents.size() >= maxRatisEvents) { + ratisEvents.removeFirst(); } ratisEvents.add(Time.formatTime(Time.now()) + "|" + event); } From fee200a228a10f399755033a83f233b8f3e6353e Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Tue, 14 Apr 2026 22:15:35 -0700 Subject: [PATCH 08/16] checkstyle Change-Id: I46f98fee2a70c3324f1feadf1cfa79919dc931d1 --- .../hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java | 1 - .../main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 1 - .../src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java | 1 - 3 files changed, 3 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java index 25c248739fbf..d5dab7800c41 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMMetrics.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdds.scm.container.placement.metrics; import java.util.LinkedList; -import java.util.List; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.utils.DBCheckpointMetrics; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index fbcf44401b1a..4483ea59903f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -27,7 +27,6 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.util.ArrayList; import java.util.Collection; import java.util.EnumMap; import java.util.List; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index 3bc049683458..948817c47ace 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -19,7 +19,6 @@ import com.google.common.annotations.VisibleForTesting; import java.util.LinkedList; -import java.util.List; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; From 84714588ccab38b130b196e684ec676881c5ebbb Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 08:27:39 -0700 Subject: [PATCH 09/16] Fix build Change-Id: I420eb0677d80a6a815958f5ee543ec927f7524de --- .../java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index 4483ea59903f..fadf1da84da5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -423,7 +423,7 @@ public void notifyLeaderReady() { public void notifyConfigurationChanged(long term, long index, RaftProtos.RaftConfigurationProto newRaftConfiguration) { addRatisEvent("Configuration changed at term index (" + term + ", " + index + - ") to " + TextFormat.shortDebugString(newRaftConfiguration)); + ") to " + newRaftConfiguration.toString()); } @Override From 99cce18fe732f11123516192622324d3071e7455 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 08:29:40 -0700 Subject: [PATCH 10/16] checkstyle Change-Id: I8af053c48de2caef4e2975316c145fb60dd610f9 --- .../main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index fadf1da84da5..dd15cf4f745e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -23,7 +23,6 @@ import com.google.common.base.Preconditions; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.TextFormat; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; From d5ddbeb741229f4e3b20a0324034911d97da231b Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 09:04:53 -0700 Subject: [PATCH 11/16] Fix Change-Id: If47aca573086c3c8e2d3729d93e732bc18442dfc --- .../src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index e3ec61c5b469..0bd52f68d875 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -693,7 +693,7 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption) this.isS3MultiTenancyEnabled = OMMultiTenantManager.checkAndEnableMultiTenancy(this, conf); - metrics = OMMetrics.create(); + metrics = OMMetrics.create(conf); omSnapshotIntMetrics = OmSnapshotInternalMetrics.create(); perfMetrics = OMPerformanceMetrics.register(); omDeletionMetrics = DeletingServiceMetrics.create(); From c6d143e04722b8d8130cad23c0c1d5d8ca0d0e73 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 12:12:48 -0700 Subject: [PATCH 12/16] Fix build error Change-Id: I539f41888732850f667a8f3c484ad8989b68cc11 --- .../hadoop/ozone/om/service/TestRangerBGSyncService.java | 2 +- .../ozone/om/ratis/TestOzoneManagerDoubleBuffer.java | 2 +- .../ratis/TestOzoneManagerDoubleBufferWithOMResponse.java | 2 +- .../ozone/om/ratis/TestOzoneManagerStateMachine.java | 5 +++-- .../ozone/om/request/TestOMClientRequestWithUserInfo.java | 2 +- .../hadoop/ozone/om/request/bucket/TestBucketRequest.java | 2 +- .../om/request/file/TestOMDirectoryCreateRequest.java | 2 +- .../request/file/TestOMDirectoryCreateRequestWithFSO.java | 2 +- .../hadoop/ozone/om/request/key/TestOMKeyRequest.java | 4 ++-- .../om/request/s3/multipart/TestS3MultipartRequest.java | 2 +- .../om/request/s3/security/TestS3GetSecretRequest.java | 2 +- .../om/request/s3/tenant/TestOMTenantCreateRequest.java | 2 +- .../om/request/s3/tenant/TestOMTenantDeleteRequest.java | 2 +- .../ozone/om/request/volume/TestOMVolumeRequest.java | 2 +- .../hadoop/ozone/om/snapshot/TestSnapshotCache.java | 8 +++++--- .../hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java | 5 ++++- .../ozone/om/snapshot/TestSnapshotRequestAndResponse.java | 4 ++-- 17 files changed, 28 insertions(+), 22 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java index 9f5690e11e4c..a01bcf370484 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/service/TestRangerBGSyncService.java @@ -177,7 +177,7 @@ public void setUp() throws IOException { Path metaDirPath = Paths.get(path, "om-meta"); conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, metaDirPath.toString()); - omMetrics = OMMetrics.create(); + omMetrics = OMMetrics.create(conf); conf.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.resolve("om").toAbsolutePath().toString()); // No need to conf.set(OzoneConfigKeys.OZONE_ADMINISTRATORS, ...) here diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java index 81a0691a749a..c0ad927bc059 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java @@ -106,8 +106,8 @@ class TestOzoneManagerDoubleBuffer { @BeforeEach public void setup() throws IOException { - OMMetrics omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + OMMetrics omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, tempDir.getAbsolutePath()); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java index 4ba3c6b5d1b7..8b3027d12cd1 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java @@ -91,8 +91,8 @@ public class TestOzoneManagerDoubleBufferWithOMResponse { @BeforeEach public void setup() throws IOException { ozoneManager = mock(OzoneManager.class, withSettings().stubOnly()); - OMMetrics omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + OMMetrics omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java index f4be86fcf588..111779b95734 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerStateMachine.java @@ -1041,10 +1041,11 @@ private RaftClientRequest buildClientRequest( @Test public void testRatisEventsRecording() { - OMMetrics metrics = OMMetrics.create(); + OzoneConfiguration conf = new OzoneConfiguration(); + OMMetrics metrics = OMMetrics.create(conf); when(om.getMetrics()).thenReturn(metrics); when(om.getOmSnapshotManager()).thenReturn(mock(OmSnapshotManager.class)); - when(om.getConfiguration()).thenReturn(new OzoneConfiguration()); + when(om.getConfiguration()).thenReturn(conf); AuditMessage auditMessage = mock(AuditMessage.class); when(auditMessage.getOp()).thenReturn("LEADER_CHANGE"); when(om.buildAuditMessageForSuccess(any(), any())).thenReturn(auditMessage); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/TestOMClientRequestWithUserInfo.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/TestOMClientRequestWithUserInfo.java index 145ffe9854fe..38036912b96b 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/TestOMClientRequestWithUserInfo.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/TestOMClientRequestWithUserInfo.java @@ -70,8 +70,8 @@ public class TestOMClientRequestWithUserInfo { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - OMMetrics omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + OMMetrics omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); OMMetadataManager omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestBucketRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestBucketRequest.java index 018e60633a50..40f54be4cc03 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestBucketRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/bucket/TestBucketRequest.java @@ -59,8 +59,8 @@ public class TestBucketRequest { public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); when(ozoneManager.getConfiguration()).thenReturn(ozoneConfiguration); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java index 1b096070ea3f..7adf92518d18 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequest.java @@ -92,8 +92,8 @@ public class TestOMDirectoryCreateRequest { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java index 164ccf661cce..8f5b6c807caa 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/file/TestOMDirectoryCreateRequestWithFSO.java @@ -92,8 +92,8 @@ public class TestOMDirectoryCreateRequestWithFSO { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); OMRequestTestUtils.configureFSOptimizedPaths(ozoneConfiguration, true); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java index b84294370c58..405cc706ef90 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/key/TestOMKeyRequest.java @@ -136,10 +136,10 @@ public class TestOMKeyRequest { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); + OzoneConfiguration ozoneConfiguration = getOzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); perfMetrics = OMPerformanceMetrics.register(); delMetrics = DeletingServiceMetrics.create(); - OzoneConfiguration ozoneConfiguration = getOzoneConfiguration(); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); ozoneConfiguration.set(OzoneConfigKeys.OZONE_METADATA_DIRS, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartRequest.java index 38749f6812a6..15ed924cc408 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/multipart/TestS3MultipartRequest.java @@ -77,8 +77,8 @@ public class TestS3MultipartRequest { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/security/TestS3GetSecretRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/security/TestS3GetSecretRequest.java index 78ccef961f38..0067b2b38047 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/security/TestS3GetSecretRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/security/TestS3GetSecretRequest.java @@ -133,8 +133,8 @@ public void setUp() throws Exception { when(call.getRemoteUser()).thenReturn(ugiAlice); Server.getCurCall().set(call); - omMetrics = OMMetrics.create(); OzoneConfiguration conf = new OzoneConfiguration(); + omMetrics = OMMetrics.create(conf); conf.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); // No need to conf.set(OzoneConfigKeys.OZONE_ADMINISTRATORS, ...) here diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantCreateRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantCreateRequest.java index 36529d01064c..4f1ff1659c15 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantCreateRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantCreateRequest.java @@ -72,8 +72,8 @@ public class TestOMTenantCreateRequest { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantDeleteRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantDeleteRequest.java index 6de4f1d8446d..8accc44fd9d6 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantDeleteRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/tenant/TestOMTenantDeleteRequest.java @@ -69,8 +69,8 @@ public class TestOMTenantDeleteRequest { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeRequest.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeRequest.java index 36c6034207c0..71cb1b166277 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeRequest.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeRequest.java @@ -61,8 +61,8 @@ public class TestOMVolumeRequest { @BeforeEach public void setup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, folder.toAbsolutePath().toString()); omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration, diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java index ebc78b26c1e8..96fdb35d4679 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java @@ -82,6 +82,7 @@ class TestSnapshotCache { private static IOzoneManagerLock lock; private SnapshotCache snapshotCache; + private OzoneConfiguration conf; private OMMetrics omMetrics; @BeforeAll @@ -94,8 +95,9 @@ static void beforeAll() throws Exception { @BeforeEach void setUp() throws Exception { + conf = new OzoneConfiguration(); // Reset cache for each test case - omMetrics = OMMetrics.create(); + omMetrics = OMMetrics.create(conf); // Create a difference mock OmSnapshot instance each time load() is called doAnswer((Answer) invocation -> { final OmSnapshot omSnapshot = mock(OmSnapshot.class); @@ -197,7 +199,7 @@ public void testLockHoldsWriteLock(int numberOfLocks) { @Test public void testLockSupplierReturnsLockWithAnotherLockReleased() { - IOzoneManagerLock ozoneManagerLock = new OzoneManagerLock(new OzoneConfiguration()); + IOzoneManagerLock ozoneManagerLock = new OzoneManagerLock(conf); snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics, 50, true, ozoneManagerLock); try (UncheckedAutoCloseableSupplier lockDetails = snapshotCache.lock()) { ozoneManagerLock.acquireWriteLock(VOLUME_LOCK, "vol1"); @@ -460,7 +462,7 @@ void testEviction3WithClose() throws IOException, InterruptedException, TimeoutE @Test @DisplayName("Snapshot operations not blocked during compaction") void testSnapshotOperationsNotBlockedDuringCompaction() throws IOException, InterruptedException, TimeoutException { - omMetrics = OMMetrics.create(); + omMetrics = OMMetrics.create(conf); snapshotCache = new SnapshotCache(cacheLoader, 1, omMetrics, 50, true, lock); final UUID dbKey1 = UUID.randomUUID(); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java index 4da22a90ed85..39be877c1926 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java @@ -188,7 +188,7 @@ public class TestSnapshotDiffManager { private final List snapshotNames = new ArrayList<>(); private final List snapshotInfoList = new ArrayList<>(); private final List snapDiffJobs = new ArrayList<>(); - private final OMMetrics omMetrics = OMMetrics.create(); + private OMMetrics omMetrics; @TempDir private File dbDir; @Mock @@ -229,6 +229,9 @@ public void init() throws RocksDBException, IOException, ExecutionException { ExitUtils.disableSystemExit(); ExitUtil.disableSystemExit(); + OzoneConfiguration conf = new OzoneConfiguration(); + omMetrics = OMMetrics.create(conf); + dbOptions = new ManagedDBOptions(); dbOptions.setCreateIfMissing(true); columnFamilyOptions = new ManagedColumnFamilyOptions(); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java index 9c6f033b907b..e3776a5b8372 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotRequestAndResponse.java @@ -142,9 +142,9 @@ protected TestSnapshotRequestAndResponse(boolean isAdmin) { @BeforeEach public void baseSetup() throws Exception { ozoneManager = mock(OzoneManager.class); - omMetrics = OMMetrics.create(); - omSnapshotIntMetrics = OmSnapshotInternalMetrics.create(); OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + omMetrics = OMMetrics.create(ozoneConfiguration); + omSnapshotIntMetrics = OmSnapshotInternalMetrics.create(); ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS, testDir.getAbsolutePath()); ozoneConfiguration.set(OzoneConfigKeys.OZONE_METADATA_DIRS, From 661ddc2f1f4287bf8f0f83572f9fe2c7fbcbd0ac Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 13:07:17 -0700 Subject: [PATCH 13/16] Fix bugs. Change-Id: Iedb11b51244968ff4f7c147301af266f926759e4 --- .../org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 2 +- .../hadoop/hdds/scm/server/SCMBlockProtocolServer.java | 3 +-- .../hdds/scm/server/StorageContainerManager.java | 10 +++++----- .../hadoop/hdds/scm/TestSCMDbCheckpointServlet.java | 2 +- .../ozone/om/snapshot/TestSnapshotDiffManager.java | 3 +-- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java index dd15cf4f745e..59b56b6dba4a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java @@ -96,7 +96,7 @@ public SCMStateMachine(final StorageContainerManager scm, this.scm = scm; this.handlers = new EnumMap<>(RequestType.class); this.transactionBuffer = buffer; - this.metrics = StorageContainerManager.getMetrics(); + this.metrics = scm.getMetrics(); TransactionInfo latestTrxInfo = this.transactionBuffer.getLatestTrxInfo(); if (!latestTrxInfo.isDefault()) { updateLastAppliedTermIndex(latestTrxInfo.getTerm(), diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 1fc7c2d96b24..e563c9f08ffc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -25,7 +25,6 @@ import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes.IO_EXCEPTION; import static org.apache.hadoop.hdds.scm.net.NetConstants.NODE_COST_DEFAULT; import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT; -import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.getPerfMetrics; import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.startRpcServer; import static org.apache.hadoop.hdds.server.ServerUtils.getRemoteUserName; import static org.apache.hadoop.hdds.server.ServerUtils.updateRPCListenAddress; @@ -108,7 +107,7 @@ public class SCMBlockProtocolServer implements public SCMBlockProtocolServer(OzoneConfiguration conf, StorageContainerManager scm) throws IOException { this.scm = scm; - this.perfMetrics = getPerfMetrics(); + this.perfMetrics = scm.getPerfMetrics(); final int handlerCount = conf.getInt(OZONE_SCM_BLOCK_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_KEY, OZONE_SCM_HANDLER_COUNT_DEFAULT, LOG::info); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 66d25195c2ab..69f7973ff1bd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -226,8 +226,8 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl /** * SCM metrics. */ - private static SCMMetrics metrics; - private static SCMPerformanceMetrics perfMetrics; + private SCMMetrics metrics; + private SCMPerformanceMetrics perfMetrics; private SCMHAMetrics scmHAMetrics; private final NettyMetrics nettyMetrics; @@ -1434,21 +1434,21 @@ public void initMetrics() { /** * Return SCM metrics instance. */ - public static SCMMetrics getMetrics() { + public SCMMetrics getMetrics() { return metrics; } /** * Initialize SCMPerformance metrics. */ - public static void initPerfMetrics() { + public void initPerfMetrics() { perfMetrics = SCMPerformanceMetrics.create(); } /** * Return SCMPerformance metrics instance. */ - public static SCMPerformanceMetrics getPerfMetrics() { + public SCMPerformanceMetrics getPerfMetrics() { return perfMetrics == null ? SCMPerformanceMetrics.create() : perfMetrics; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDbCheckpointServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDbCheckpointServlet.java index cb8045e0e6e5..7054e350682f 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDbCheckpointServlet.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestSCMDbCheckpointServlet.java @@ -87,7 +87,7 @@ public void init() throws Exception { .build(); cluster.waitForClusterToBeReady(); StorageContainerManager scm = cluster.getStorageContainerManager(); - scmMetrics = StorageContainerManager.getMetrics(); + scmMetrics = scm.getMetrics(); requestMock = mock(HttpServletRequest.class); when(requestMock.getParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH)) diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java index 39be877c1926..9b573a574229 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java @@ -188,7 +188,6 @@ public class TestSnapshotDiffManager { private final List snapshotNames = new ArrayList<>(); private final List snapshotInfoList = new ArrayList<>(); private final List snapDiffJobs = new ArrayList<>(); - private OMMetrics omMetrics; @TempDir private File dbDir; @Mock @@ -230,7 +229,7 @@ public void init() throws RocksDBException, IOException, ExecutionException { ExitUtil.disableSystemExit(); OzoneConfiguration conf = new OzoneConfiguration(); - omMetrics = OMMetrics.create(conf); + OMMetrics omMetrics = OMMetrics.create(conf); dbOptions = new ManagedDBOptions(); dbOptions.setCreateIfMissing(true); From 22f30f4517c437f84e901d07a9906dbc945ea805 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 13:30:19 -0700 Subject: [PATCH 14/16] Fix compilation errors. Change-Id: I1929c7d55a423d82fe1b9efbbb31c0da6cd1ed26 --- .../hadoop/hdds/scm/ha/TestSCMStateMachine.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java index 6672d764b7c8..828606f2d424 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMStateMachine.java @@ -19,7 +19,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.when; import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMMetrics; @@ -28,7 +27,6 @@ import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.server.protocol.TermIndex; import org.junit.jupiter.api.Test; -import org.mockito.MockedStatic; /** * Test SCMStateMachine events recording. @@ -39,17 +37,15 @@ public class TestSCMStateMachine { public void testRatisEventsRecording() throws Exception { StorageContainerManager scm = mock(StorageContainerManager.class); SCMMetrics metrics = SCMMetrics.create(); + when(scm.getMetrics()).thenReturn(metrics); + SCMHADBTransactionBuffer buffer = mock(SCMHADBTransactionBuffer.class); when(buffer.getLatestTrxInfo()).thenReturn(TransactionInfo.valueOf(TermIndex.valueOf(0, 0))); SCMStateMachine stateMachine = new SCMStateMachine(scm, buffer); - try (MockedStatic scmStaticMock = mockStatic(StorageContainerManager.class)) { - scmStaticMock.when(StorageContainerManager::getMetrics).thenReturn(metrics); - - stateMachine.notifyConfigurationChanged(1, 1, RaftProtos.RaftConfigurationProto.getDefaultInstance()); - assertTrue(metrics.getRatisEvents().contains("New peers [] added at term index")); - } + stateMachine.notifyConfigurationChanged(1, 1, RaftProtos.RaftConfigurationProto.getDefaultInstance()); + assertTrue(metrics.getRatisEvents().contains("Configuration changed at term index")); metrics.unRegister(); } From 237733bf23ce219f62ec867bf14b2c4bf9d3b35d Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Wed, 15 Apr 2026 17:59:29 -0700 Subject: [PATCH 15/16] Add configurations Change-Id: I21454c2ad7428e41745bba06b0c4a346b2773c15 --- .../common/src/main/resources/ozone-default.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index e159eb6948b8..45207cb96fea 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -5044,4 +5044,16 @@ 5m Interval for cleaning up orphan snapshot local data versions corresponding to snapshots + + ozone.scm.ratis.events.max.limit + 100 + OZONE, RATIS, SCM + The maximum number of events that can be pending in SCM Ratis. + + + ozone.om.ratis.events.max.limit + 100 + OZONE, RATIS, OM + The maximum number of events that can be pending in OM Ratis. + From d0750f1d9c57da3afc8e04b5e5971cc4e68fc270 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 16 Apr 2026 13:54:41 -0700 Subject: [PATCH 16/16] Fix checkstyle in OMMetrics.java Change-Id: Ib16889a190f114d19d95b628430d2cd78604fd84 --- .../src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index 948817c47ace..5a70483b2bc8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -29,7 +29,6 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; -import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.snapshot.OMSnapshotDirectoryMetrics; import org.apache.hadoop.util.Time;