From e76c0671217bd7fe91f77c1e93e0e182514d93e6 Mon Sep 17 00:00:00 2001 From: Teemu Ollakka Date: Tue, 30 Jun 2026 20:01:08 +0300 Subject: [PATCH] MDEV-40222 Prevent MTR hang when waiting for wsrep_ready A query against a server that is up but wedged can connect yet never return, so the loop-count bound in wait_wsrep_ready() did not actually limit the wait and MTR could hang until the suite timeout fired. Add an optional $timeout to run_query_output(): the mysql client is now spawned via My::SafeProcess->new and waited for with wait_one($timeout), killing the client and returning non-zero if it does not finish in time. Bound wait_wsrep_ready() by a wall-clock deadline (start_timer) instead of a loop count, and pass the remaining time to each query so no single hung client can exceed the overall server startup budget. --- mysql-test/mariadb-test-run.pl | 37 ++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index 2a701794ba320..7461c2aa0d0df 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -5439,8 +5439,13 @@ ($$) # Run a query against a server using mysql client. The output of # the query will be written into outfile. # +# If $timeout (seconds) is given, the client is not waited for +# indefinitely: a server stuck in an unstable state can accept the +# connection but never answer the query, which would otherwise block forever. +# In that case the client is killed and a non-zero status is returned. +# sub run_query_output { - my ($mysqld, $query, $outfile)= @_; + my ($mysqld, $query, $outfile, $timeout)= @_; my $args; mtr_init_args(\$args); @@ -5449,7 +5454,7 @@ sub run_query_output { mtr_add_arg($args, "--silent"); mtr_add_arg($args, "--execute=%s", $query); - my $res= My::SafeProcess->run + my $proc= My::SafeProcess->new ( name => "run_query_output -> ".$mysqld->name(), path => $exe_mysql, @@ -5458,7 +5463,15 @@ sub run_query_output { error => $outfile ); - return $res + # wait_one() returns 1 while the process is still running, + # in which case we kill the hung client. + if ($proc->wait_one($timeout)) + { + $proc->kill(); + return 1; + } + + return $proc->exit_status(); } @@ -5476,7 +5489,13 @@ ($$) my ($tinfo, $mysqld)= @_; my $sleeptime= 100; # Milliseconds - my $loops= ($opt_start_timeout * 1000) / $sleeptime; + + # Bound the whole wait by the server startup timeout. This must be a + # wall-clock deadline rather than a simple loop count: a single query + # against a wedged server can block indefinitely, which would otherwise + # defeat the loop bound and hang MTR until the surrounding suite timeout + # fires. + my $timeout= start_timer($opt_start_timeout); my $name= $mysqld->name(); my $outfile= "$opt_vardir/tmp/$name.wsrep_ready"; @@ -5485,11 +5504,17 @@ ($$) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready'"; - for (my $loop= 1; $loop <= $loops; $loop++) + while (1) { + # Cap each query by the time left so a hung client cannot exceed the + # overall startup budget. Integer seconds, and at least 1 (wait_one() + # treats 0 as a non-blocking poll). + my $remaining= int($timeout - time); + last if $remaining <= 0; + # Careful... if MTR runs with option 'verbose' then the # file contains also SafeProcess verbose output - if (run_query_output($mysqld, $query, $outfile) == 0 && + if (run_query_output($mysqld, $query, $outfile, $remaining) == 0 && mtr_grab_file($outfile) =~ /WSREP_READY\s+ON/) { unlink($outfile);