From f7dab10c459c9d8d3c0352050501c9dedc4f7d2e Mon Sep 17 00:00:00 2001 From: William Roebuck <244554584+WilliamRoebuck@users.noreply.github.com> Date: Wed, 27 May 2026 16:42:26 +0100 Subject: [PATCH 1/2] Protect nodeExecuted with a mutex --- .../src/process_group_manager/details/graph.cpp | 14 +++++++++++--- .../src/process_group_manager/details/graph.hpp | 5 +++++ .../complex_monitoring/control_client_mock.cpp | 3 --- .../crash_on_startup/control_client_mock.cpp | 4 ---- .../control_client_mock.cpp | 4 ---- tests/integration/smoke/control_daemon_mock.cpp | 4 ---- 6 files changed, 16 insertions(+), 18 deletions(-) diff --git a/score/launch_manager/daemon/src/process_group_manager/details/graph.cpp b/score/launch_manager/daemon/src/process_group_manager/details/graph.cpp index 2750ea5f..c5ff0e7c 100644 --- a/score/launch_manager/daemon/src/process_group_manager/details/graph.cpp +++ b/score/launch_manager/daemon/src/process_group_manager/details/graph.cpp @@ -275,7 +275,10 @@ bool Graph::startTransition(ProcessGroupStateID pg_state) if (nullptr != process_index_list) { - setState(GraphState::kInTransition); + { + std::shared_lock lock(transition_completion_mutex_); + setState(GraphState::kInTransition); + } if (GraphState::kInTransition == getState()) { @@ -315,7 +318,10 @@ bool Graph::startTransitionToOffState() requested_state_.pg_state_name_ = off_state_; } bool result = false; - setState(GraphState::kInTransition); + { + std::shared_lock lock(transition_completion_mutex_); + setState(GraphState::kInTransition); + } if (GraphState::kInTransition == getState()) { std::vector empty_list{}; @@ -327,6 +333,8 @@ bool Graph::startTransitionToOffState() void Graph::nodeExecuted() { + std::unique_lock lock(transition_completion_mutex_); + GraphState current_state = getState(); if (current_state == GraphState::kInTransition) @@ -390,7 +398,6 @@ inline void Graph::handleNonTransitionExecution(GraphState current_state) << (static_cast(clock()) / (static_cast(CLOCKS_PER_SEC) / 1000.0)) << "ms"; } setState(GraphState::kUndefinedState); - if (current_state == GraphState::kAborting) { setPendingEvent(abort_code_); @@ -414,6 +421,7 @@ void Graph::abort(uint32_t code, ControlClientCode reason) void Graph::cancel() { + std::shared_lock lock(transition_completion_mutex_); setState(GraphState::kCancelled); if (getState() == GraphState::kCancelled) diff --git a/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp b/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp index 234cab63..6fef598e 100644 --- a/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp +++ b/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -415,6 +416,10 @@ class Graph final { /// @brief Mutex protecting concurrent access to requested_state_.pg_state_name_ mutable std::mutex requested_state_mutex_{}; + /// @brief Mutex protecting new transitions from interferring with concluding transitions. + /// This enforces that, when a transition has completed successfully, it cannot then be cancelled. + std::shared_mutex transition_completion_mutex_; + /// @brief Pointer to the ProcessGroupManager. ProcessGroupManager* pgm_; diff --git a/tests/integration/complex_monitoring/control_client_mock.cpp b/tests/integration/complex_monitoring/control_client_mock.cpp index 598d6449..94c46e84 100644 --- a/tests/integration/complex_monitoring/control_client_mock.cpp +++ b/tests/integration/complex_monitoring/control_client_mock.cpp @@ -29,9 +29,6 @@ TEST(ComplexMonitoring, ControlClientMock) auto result = score::mw::lifecycle::LifecycleClient{}.ReportExecutionState(score::mw::lifecycle::ExecutionState::kRunning); ASSERT_TRUE(result.has_value()) << "ReportExecutionState() failed: " << result.error().Message(); } - // We have to wait for the initial state transition to fully complete, otherwise unexpected failures can occur - // Tracked in https://github.com/eclipse-score/lifecycle/issues/198 - sleep(1); TEST_STEP("Launch monitored process") { diff --git a/tests/integration/crash_on_startup/control_client_mock.cpp b/tests/integration/crash_on_startup/control_client_mock.cpp index d4a8a651..ba49f4bd 100644 --- a/tests/integration/crash_on_startup/control_client_mock.cpp +++ b/tests/integration/crash_on_startup/control_client_mock.cpp @@ -30,10 +30,6 @@ TEST(CrashOnStartup, ControlClientMock) ASSERT_TRUE(result.has_value()) << "ReportExecutionState() failed: " << result.error().Message(); } - // We have to wait for the initial state transition to fully complete, otherwise unexpected failures can occur - // Tracked in https://github.com/eclipse-score/lifecycle/issues/198 - sleep(1); - // Given a process that crashes on startup twice TEST_STEP("Launch process crashing on startup twice") { diff --git a/tests/integration/process_crash_monitoring/control_client_mock.cpp b/tests/integration/process_crash_monitoring/control_client_mock.cpp index 01c179eb..827a0a26 100644 --- a/tests/integration/process_crash_monitoring/control_client_mock.cpp +++ b/tests/integration/process_crash_monitoring/control_client_mock.cpp @@ -34,10 +34,6 @@ TEST(ProcessCrashMonitoring, ControlClientMock) auto result = score::mw::lifecycle::LifecycleClient{}.ReportExecutionState(score::mw::lifecycle::ExecutionState::kRunning); ASSERT_TRUE(result.has_value()) << "ReportExecutionState() failed: " << result.error().Message(); } - - // We have to wait for the initial state transition to fully complete, otherwise unexpected failures can occur - // Tracked in https://github.com/eclipse-score/lifecycle/issues/198 - sleep(1); TEST_STEP("Start crashing process") { diff --git a/tests/integration/smoke/control_daemon_mock.cpp b/tests/integration/smoke/control_daemon_mock.cpp index 5d4bb426..598f1c2d 100644 --- a/tests/integration/smoke/control_daemon_mock.cpp +++ b/tests/integration/smoke/control_daemon_mock.cpp @@ -31,10 +31,6 @@ TEST(Smoke, Daemon) ASSERT_TRUE(result.has_value()) << "client.ReportExecutionState() failed: " << result.error().Message(); } - // We have to wait for the initial state transition to fully complete, otherwise unexpected failures can occur - // Tracked in https://github.com/eclipse-score/lifecycle/issues/198 - sleep(1); - TEST_STEP("Activate RunTarget Running") { score::cpp::stop_token stop_token; From 436e7e64a9bc716d564a6c78a632d3185d5b67d5 Mon Sep 17 00:00:00 2001 From: William Roebuck <244554584+WilliamRoebuck@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:40:02 +0100 Subject: [PATCH 2/2] Fix typo --- .../daemon/src/process_group_manager/details/graph.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp b/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp index 6fef598e..f1a8f2fc 100644 --- a/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp +++ b/score/launch_manager/daemon/src/process_group_manager/details/graph.hpp @@ -416,7 +416,7 @@ class Graph final { /// @brief Mutex protecting concurrent access to requested_state_.pg_state_name_ mutable std::mutex requested_state_mutex_{}; - /// @brief Mutex protecting new transitions from interferring with concluding transitions. + /// @brief Mutex protecting new transitions from interfering with concluding transitions. /// This enforces that, when a transition has completed successfully, it cannot then be cancelled. std::shared_mutex transition_completion_mutex_;