From 9ef693b8de4d335af7be809f076800906088b1e1 Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 10:49:52 -0700 Subject: [PATCH 1/7] Debugging cloud test failure --- .github/workflows/ci.yml | 2 +- temporalio/client.py | 1 + tests/worker/test_activity.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1223cf44..c9d000d20 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ env: jobs: # Build and test the project build-lint-test: - timeout-minutes: 30 + timeout-minutes: 35 strategy: fail-fast: false matrix: diff --git a/temporalio/client.py b/temporalio/client.py index cc2750ec6..40a5a023c 100644 --- a/temporalio/client.py +++ b/temporalio/client.py @@ -2963,6 +2963,7 @@ async def result( rpc_metadata=rpc_metadata, rpc_timeout=rpc_timeout, ): + print("Getting history event:", event) if event.HasField("workflow_execution_completed_event_attributes"): complete_attr = event.workflow_execution_completed_event_attributes # Follow execution diff --git a/tests/worker/test_activity.py b/tests/worker/test_activity.py index 99efdf30f..80b5a2987 100644 --- a/tests/worker/test_activity.py +++ b/tests/worker/test_activity.py @@ -194,6 +194,7 @@ async def capture_info() -> None: nonlocal info info = activity.info() + print("Executing workflow") result = await _execute_workflow_with_activity( client, worker, @@ -201,6 +202,7 @@ async def capture_info() -> None: start_to_close_timeout_ms=4000, shared_state_manager=shared_state_manager, ) + print("Executed workflow") assert info assert info.activity_id # type:ignore[reportUnreachable] @@ -1621,12 +1623,14 @@ async def _execute_workflow_with_activity( task_queue=worker.task_queue, result_type=result_type_override, ) + print("Constructing activity result") return _ActivityResult( act_task_queue=worker_config["task_queue"], result=await handle.result(), handle=handle, ) finally: + print("On complete.") if on_complete: on_complete() From fcf76bc139ea3f22102437b01b27b3bd4db2693c Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 11:03:19 -0700 Subject: [PATCH 2/7] Debugging cloud test failure --- temporalio/worker/_worker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/temporalio/worker/_worker.py b/temporalio/worker/_worker.py index 332e2ead7..873db87e0 100644 --- a/temporalio/worker/_worker.py +++ b/temporalio/worker/_worker.py @@ -836,6 +836,8 @@ async def raise_on_shutdown(): f"Beginning worker shutdown, will wait {graceful_timeout} before cancelling activities" ) + print("Initiate bridge worker shutdown") + # Initiate core worker shutdown self._bridge_worker.initiate_shutdown() @@ -869,6 +871,8 @@ async def raise_on_shutdown(): if self._nexus_worker: await self._nexus_worker.wait_all_completed() + print("Finalize bridge worker shutdown") + # Do final shutdown try: await self._bridge_worker.finalize_shutdown() From 8c2d07105efce4ee147a9782bbeccd6513349a2b Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 11:16:59 -0700 Subject: [PATCH 3/7] Debugging cloud test failure --- temporalio/worker/_worker.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/temporalio/worker/_worker.py b/temporalio/worker/_worker.py index 873db87e0..91c79b76b 100644 --- a/temporalio/worker/_worker.py +++ b/temporalio/worker/_worker.py @@ -846,6 +846,8 @@ async def raise_on_shutdown(): if worker and task.done() and task.exception(): tasks[worker] = asyncio.create_task(worker.drain_poll_queue()) + print("Exception tasks replaced") + # Notify shutdown occurring if self._activity_worker: self._activity_worker.notify_shutdown() @@ -854,8 +856,13 @@ async def raise_on_shutdown(): if self._nexus_worker: self._nexus_worker.notify_shutdown() + print("Workers notified") + # Wait for all tasks to complete (i.e. for poller loops to stop) await asyncio.wait(tasks.values()) + + print("Tasks drained: ", tasks) + # Sometimes both workers throw an exception and since we only take the # first, Python may complain with "Task exception was never retrieved" # if we don't get the others. Therefore we call cancel on each task @@ -863,11 +870,16 @@ async def raise_on_shutdown(): for task in tasks.values(): task.cancel() + print("Tasks cancelled") + # Let all activity / nexus operations completions finish. We cannot guarantee that # because poll shutdown completed (which means activities/operations completed) # that they got flushed to the server. if self._activity_worker: await self._activity_worker.wait_all_completed() + + print("Activitiy worker waited") + if self._nexus_worker: await self._nexus_worker.wait_all_completed() From 9d13c89b2ae873e6c25d0df89cd6d140e913c627 Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 11:28:34 -0700 Subject: [PATCH 4/7] Debugging cloud test failure --- temporalio/worker/_workflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/temporalio/worker/_workflow.py b/temporalio/worker/_workflow.py index fb104b414..efc251dc3 100644 --- a/temporalio/worker/_workflow.py +++ b/temporalio/worker/_workflow.py @@ -210,8 +210,10 @@ async def run( for t in asyncio.all_tasks() if getattr(t, "__temporal_task_tag", None) is task_tag ] + print("Waiting for tasks to finish: ", our_tasks) if our_tasks: await asyncio.wait(our_tasks) + print("Waited for tasks to finish: ", our_tasks) # Shutdown the thread pool executor if we created it if not self._workflow_task_executor_user_provided: self._workflow_task_executor.shutdown() From 0c2f43b3718ae0f9449b33317408f73c61699d3e Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 12:01:21 -0700 Subject: [PATCH 5/7] Debugging cloud test failure --- tests/worker/test_activity.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/worker/test_activity.py b/tests/worker/test_activity.py index 80b5a2987..7d8c61938 100644 --- a/tests/worker/test_activity.py +++ b/tests/worker/test_activity.py @@ -169,6 +169,7 @@ def some_activity() -> None: assert saw_error +@pytest.mark.timeout(120) async def test_activity_info( client: Client, worker: ExternalWorker, From 4565b108e10f56e31952c64ec747157fb32f3a1f Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 13:32:13 -0700 Subject: [PATCH 6/7] Debugging cloud test failure --- temporalio/runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/temporalio/runtime.py b/temporalio/runtime.py index b151f96f0..c6f3e6aac 100644 --- a/temporalio/runtime.py +++ b/temporalio/runtime.py @@ -217,7 +217,7 @@ def _to_bridge_config(self) -> temporalio.bridge.runtime.LoggingConfig: LoggingConfig.default = LoggingConfig( - filter=TelemetryFilter(core_level="WARN", other_level="ERROR") + filter=TelemetryFilter(core_level="DEBUG", other_level="ERROR") ) _module_start_time = time.time() From ff1d0bfb28eb0959896bc1c128073518d9c15dd1 Mon Sep 17 00:00:00 2001 From: Tim Conley Date: Thu, 2 Apr 2026 13:32:39 -0700 Subject: [PATCH 7/7] Debugging cloud test failure --- tests/worker/test_activity.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/worker/test_activity.py b/tests/worker/test_activity.py index 7d8c61938..80b5a2987 100644 --- a/tests/worker/test_activity.py +++ b/tests/worker/test_activity.py @@ -169,7 +169,6 @@ def some_activity() -> None: assert saw_error -@pytest.mark.timeout(120) async def test_activity_info( client: Client, worker: ExternalWorker,