From 7222be21f1edca3afedcb8e40e040d8f0445d596 Mon Sep 17 00:00:00 2001 From: Ivan Barba Date: Thu, 30 Apr 2026 18:32:10 +0000 Subject: [PATCH 1/4] Add butler command to run uworker preprocess locally --- butler.py | 14 +++++++ src/local/butler/uworker_preprocess.py | 56 ++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 src/local/butler/uworker_preprocess.py diff --git a/butler.py b/butler.py index 29b23c3740..019704a07f 100644 --- a/butler.py +++ b/butler.py @@ -354,6 +354,19 @@ def _add_schedule_tworker_fuzz_subparser(toplevel_subparsers): help='Script specific arguments (FUZZER_NAME JOB_NAME)') +def _add_uworker_preprocess_subparser(toplevel_subparsers): + """Adds a parser for the `uworker_preprocess` command.""" + parser = toplevel_subparsers.add_parser( + 'uworker_preprocess', + help='Run the preprocess step of a fuzz task locally.') + parser.add_argument( + '--fuzzer', required=True, help='The name of the fuzzer.') + parser.add_argument( + '--job', required=True, help='The job name.') + parser.add_argument( + '-c', '--config-dir', required=True, help='Path to application config.') + + def _add_reproduce_subparser(toplevel_subparsers): """Adds a parser for the `reproduce` command.""" parser = toplevel_subparsers.add_parser( @@ -491,6 +504,7 @@ def main(): _add_weights_subparser(subparsers) _add_reproduce_subparser(subparsers) _add_schedule_tworker_fuzz_subparser(subparsers) + _add_uworker_preprocess_subparser(subparsers) args = parser.parse_args() if not args.command: parser.print_help() diff --git a/src/local/butler/uworker_preprocess.py b/src/local/butler/uworker_preprocess.py new file mode 100644 index 0000000000..b72026129d --- /dev/null +++ b/src/local/butler/uworker_preprocess.py @@ -0,0 +1,56 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""uworker_preprocess.py runs the preprocess step of a fuzz task locally.""" + +import os +import sys + +from clusterfuzz._internal.bot.tasks import utasks +from clusterfuzz._internal.bot.tasks.utasks import fuzz_task +from clusterfuzz._internal.config import local_config +from clusterfuzz._internal.datastore import ndb_init +from clusterfuzz._internal.system import environment +from clusterfuzz._internal.metrics import logs + + +def execute(args): + """Executes the uworker_preprocess command.""" + sys.path.insert(0, os.path.abspath(os.path.join('src', 'appengine'))) + sys.path.insert( + 0, os.path.abspath(os.path.join('src', 'appengine', 'third_party'))) + + os.environ['CONFIG_DIR_OVERRIDE'] = os.path.abspath(os.path.expanduser(args.config_dir)) + local_config.ProjectConfig().set_environment() + + # We want to act as a tworker, so we configure logs accordingly. + logs.configure('run_bot') + + print(f'Running preprocess for fuzzer: {args.fuzzer}, job: {args.job}') + + with ndb_init.context(): + uworker_env = {} + + environment.set_value('TASK_NAME', 'fuzz') + environment.set_value('JOB_NAME', args.job) + + # tworker_preprocess expects: (module, task_argument, job_type, uworker_env) + # For fuzz task, task_argument is fuzzer_name. + result = utasks.tworker_preprocess(fuzz_task, args.fuzzer, args.job, uworker_env) + + if result: + download_url, _ = result + print(f'\nPreprocess successful!') + print(f'Input Download URL: {download_url}') + else: + print('\nPreprocess failed or returned no result.') From 618c19ffd2ad16eaf2e328ec515dc83fb931d842 Mon Sep 17 00:00:00 2001 From: Ivan Barba Date: Tue, 5 May 2026 03:49:38 +0000 Subject: [PATCH 2/4] Implement environment preparation in uworker preprocess --- src/local/butler/uworker_preprocess.py | 77 +++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/src/local/butler/uworker_preprocess.py b/src/local/butler/uworker_preprocess.py index b72026129d..ef5905d130 100644 --- a/src/local/butler/uworker_preprocess.py +++ b/src/local/butler/uworker_preprocess.py @@ -15,34 +15,93 @@ import os import sys +import uuid from clusterfuzz._internal.bot.tasks import utasks from clusterfuzz._internal.bot.tasks.utasks import fuzz_task from clusterfuzz._internal.config import local_config +from clusterfuzz._internal.datastore import data_types from clusterfuzz._internal.datastore import ndb_init from clusterfuzz._internal.system import environment from clusterfuzz._internal.metrics import logs -def execute(args): - """Executes the uworker_preprocess command.""" +def _get_job_environment(job_name): + """Fetches the job entity and returns its environment variables.""" + job = data_types.Job.query(data_types.Job.name == job_name).get() + if job: + return job.get_environment() + else: + print(f"Error: Job {job_name} not found in Datastore.") + sys.exit(1) + + +def _get_fuzzer_environment(fuzzer_name, job_name): + """Fetches fuzzer entity and returns its additional environment variables.""" + if environment.is_engine_fuzzer_job(job_name): + return {} + + fuzzer = data_types.Fuzzer.query(data_types.Fuzzer.name == fuzzer_name).get() + if not fuzzer: + print(f"Error: Fuzzer {fuzzer_name} not found in Datastore.") + sys.exit(1) + + + additional_default_variables = '' + additional_variables_for_job = '' + + if fuzzer.additional_environment_string: + for env_variable_line in fuzzer.additional_environment_string.splitlines(): + if '=' in env_variable_line and ':' in env_variable_line.split('=', 1)[0]: + fuzzer_job_name, environment_definition = env_variable_line.split(':', 1) + if fuzzer_job_name == job_name: + additional_variables_for_job += '\n%s' % environment_definition + continue + additional_default_variables += '\n%s' % env_variable + + env_string = additional_default_variables + additional_variables_for_job + return environment.parse_environment_definition(env_string) + + +def _get_uworker_env(args): + """Prepares the complete environment variables for the payload.""" + uworker_env = _get_job_environment(args.job) + uworker_env.update(_get_fuzzer_environment(args.fuzzer, args.job)) + + # Replicate what process_command_impl does in a real tworker + uworker_env['TASK_NAME'] = 'fuzz' + uworker_env['TASK_ARGUMENT'] = args.fuzzer + uworker_env['JOB_NAME'] = args.job + + # Add logging metadata to be carried over to uworker_main + uworker_env['CF_TASK_NAME'] = 'fuzz' + uworker_env['CF_TASK_JOB_NAME'] = args.job + uworker_env['CF_TASK_ARGUMENT'] = args.fuzzer + uworker_env['CF_TASK_ID'] = str(uuid.uuid4()) + + return uworker_env + + +def _early_setup(args): + """Early setup needed for config and logs.""" sys.path.insert(0, os.path.abspath(os.path.join('src', 'appengine'))) sys.path.insert( 0, os.path.abspath(os.path.join('src', 'appengine', 'third_party'))) - os.environ['CONFIG_DIR_OVERRIDE'] = os.path.abspath(os.path.expanduser(args.config_dir)) + environment.set_value('CONFIG_DIR_OVERRIDE', os.path.abspath(os.path.expanduser(args.config_dir))) + environment.set_value('LOG_TO_CONSOLE', True) local_config.ProjectConfig().set_environment() - - # We want to act as a tworker, so we configure logs accordingly. logs.configure('run_bot') + + +def execute(args): + """Executes the uworker_preprocess command.""" + _early_setup(args) print(f'Running preprocess for fuzzer: {args.fuzzer}, job: {args.job}') with ndb_init.context(): - uworker_env = {} - - environment.set_value('TASK_NAME', 'fuzz') - environment.set_value('JOB_NAME', args.job) + uworker_env = _get_uworker_env(args) # tworker_preprocess expects: (module, task_argument, job_type, uworker_env) # For fuzz task, task_argument is fuzzer_name. From a30f9a15224fb5b39dd98f91b8e1db41cd92670b Mon Sep 17 00:00:00 2001 From: Ivan Barba Date: Tue, 5 May 2026 03:55:38 +0000 Subject: [PATCH 3/4] Rename script to just "Preprocess" --- butler.py | 8 ++++---- .../{uworker_preprocess.py => preprocess.py} | 18 ++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) rename src/local/butler/{uworker_preprocess.py => preprocess.py} (86%) diff --git a/butler.py b/butler.py index 019704a07f..ccb345af84 100644 --- a/butler.py +++ b/butler.py @@ -354,10 +354,10 @@ def _add_schedule_tworker_fuzz_subparser(toplevel_subparsers): help='Script specific arguments (FUZZER_NAME JOB_NAME)') -def _add_uworker_preprocess_subparser(toplevel_subparsers): - """Adds a parser for the `uworker_preprocess` command.""" +def _add_preprocess_subparser(toplevel_subparsers): + """Adds a parser for the `preprocess` command.""" parser = toplevel_subparsers.add_parser( - 'uworker_preprocess', + 'preprocess', help='Run the preprocess step of a fuzz task locally.') parser.add_argument( '--fuzzer', required=True, help='The name of the fuzzer.') @@ -504,7 +504,7 @@ def main(): _add_weights_subparser(subparsers) _add_reproduce_subparser(subparsers) _add_schedule_tworker_fuzz_subparser(subparsers) - _add_uworker_preprocess_subparser(subparsers) + _add_preprocess_subparser(subparsers) args = parser.parse_args() if not args.command: parser.print_help() diff --git a/src/local/butler/uworker_preprocess.py b/src/local/butler/preprocess.py similarity index 86% rename from src/local/butler/uworker_preprocess.py rename to src/local/butler/preprocess.py index ef5905d130..9796404c9f 100644 --- a/src/local/butler/uworker_preprocess.py +++ b/src/local/butler/preprocess.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""uworker_preprocess.py runs the preprocess step of a fuzz task locally.""" +"""preprocess.py runs the preprocess step of a fuzz task locally.""" import os import sys @@ -43,21 +43,19 @@ def _get_fuzzer_environment(fuzzer_name, job_name): fuzzer = data_types.Fuzzer.query(data_types.Fuzzer.name == fuzzer_name).get() if not fuzzer: - print(f"Error: Fuzzer {fuzzer_name} not found in Datastore.") - sys.exit(1) - + return {} additional_default_variables = '' additional_variables_for_job = '' - if fuzzer.additional_environment_string: - for env_variable_line in fuzzer.additional_environment_string.splitlines(): - if '=' in env_variable_line and ':' in env_variable_line.split('=', 1)[0]: - fuzzer_job_name, environment_definition = env_variable_line.split(':', 1) + if hasattr(fuzzer, 'additional_environment_string') and fuzzer.additional_environment_string: + for line in fuzzer.additional_environment_string.splitlines(): + if '=' in line and ':' in line.split('=', 1)[0]: + fuzzer_job_name, environment_definition = line.split(':', 1) if fuzzer_job_name == job_name: additional_variables_for_job += '\n%s' % environment_definition continue - additional_default_variables += '\n%s' % env_variable + additional_default_variables += '\n%s' % line env_string = additional_default_variables + additional_variables_for_job return environment.parse_environment_definition(env_string) @@ -95,7 +93,7 @@ def _early_setup(args): def execute(args): - """Executes the uworker_preprocess command.""" + """Executes the preprocess command.""" _early_setup(args) print(f'Running preprocess for fuzzer: {args.fuzzer}, job: {args.job}') From 51348d4c7ef6e5db8ffbbec2c47fc96f33fe4dd1 Mon Sep 17 00:00:00 2001 From: Ivan Barba Date: Tue, 5 May 2026 04:09:25 +0000 Subject: [PATCH 4/4] Fixes Linter errors --- butler.py | 9 +++---- src/local/butler/preprocess.py | 44 ++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/butler.py b/butler.py index ccb345af84..427b0df1c1 100644 --- a/butler.py +++ b/butler.py @@ -357,12 +357,9 @@ def _add_schedule_tworker_fuzz_subparser(toplevel_subparsers): def _add_preprocess_subparser(toplevel_subparsers): """Adds a parser for the `preprocess` command.""" parser = toplevel_subparsers.add_parser( - 'preprocess', - help='Run the preprocess step of a fuzz task locally.') - parser.add_argument( - '--fuzzer', required=True, help='The name of the fuzzer.') - parser.add_argument( - '--job', required=True, help='The job name.') + 'preprocess', help='Run the preprocess step of a fuzz task locally.') + parser.add_argument('--fuzzer', required=True, help='The name of the fuzzer.') + parser.add_argument('--job', required=True, help='The job name.') parser.add_argument( '-c', '--config-dir', required=True, help='Path to application config.') diff --git a/src/local/butler/preprocess.py b/src/local/butler/preprocess.py index 9796404c9f..e3b8180893 100644 --- a/src/local/butler/preprocess.py +++ b/src/local/butler/preprocess.py @@ -22,8 +22,8 @@ from clusterfuzz._internal.config import local_config from clusterfuzz._internal.datastore import data_types from clusterfuzz._internal.datastore import ndb_init -from clusterfuzz._internal.system import environment from clusterfuzz._internal.metrics import logs +from clusterfuzz._internal.system import environment def _get_job_environment(job_name): @@ -31,24 +31,24 @@ def _get_job_environment(job_name): job = data_types.Job.query(data_types.Job.name == job_name).get() if job: return job.get_environment() - else: - print(f"Error: Job {job_name} not found in Datastore.") - sys.exit(1) + raise RuntimeError(f'Error: Job {job_name} not found in Datastore.') def _get_fuzzer_environment(fuzzer_name, job_name): """Fetches fuzzer entity and returns its additional environment variables.""" if environment.is_engine_fuzzer_job(job_name): return {} - + fuzzer = data_types.Fuzzer.query(data_types.Fuzzer.name == fuzzer_name).get() if not fuzzer: - return {} - + raise RuntimeError(f'Error: Fuzzer {fuzzer_name} not found in Datastore.') + additional_default_variables = '' additional_variables_for_job = '' - - if hasattr(fuzzer, 'additional_environment_string') and fuzzer.additional_environment_string: + + if hasattr( + fuzzer, + 'additional_environment_string') and fuzzer.additional_environment_string: for line in fuzzer.additional_environment_string.splitlines(): if '=' in line and ':' in line.split('=', 1)[0]: fuzzer_job_name, environment_definition = line.split(':', 1) @@ -56,7 +56,7 @@ def _get_fuzzer_environment(fuzzer_name, job_name): additional_variables_for_job += '\n%s' % environment_definition continue additional_default_variables += '\n%s' % line - + env_string = additional_default_variables + additional_variables_for_job return environment.parse_environment_definition(env_string) @@ -65,18 +65,18 @@ def _get_uworker_env(args): """Prepares the complete environment variables for the payload.""" uworker_env = _get_job_environment(args.job) uworker_env.update(_get_fuzzer_environment(args.fuzzer, args.job)) - + # Replicate what process_command_impl does in a real tworker uworker_env['TASK_NAME'] = 'fuzz' uworker_env['TASK_ARGUMENT'] = args.fuzzer uworker_env['JOB_NAME'] = args.job - + # Add logging metadata to be carried over to uworker_main uworker_env['CF_TASK_NAME'] = 'fuzz' uworker_env['CF_TASK_JOB_NAME'] = args.job uworker_env['CF_TASK_ARGUMENT'] = args.fuzzer uworker_env['CF_TASK_ID'] = str(uuid.uuid4()) - + return uworker_env @@ -85,8 +85,9 @@ def _early_setup(args): sys.path.insert(0, os.path.abspath(os.path.join('src', 'appengine'))) sys.path.insert( 0, os.path.abspath(os.path.join('src', 'appengine', 'third_party'))) - - environment.set_value('CONFIG_DIR_OVERRIDE', os.path.abspath(os.path.expanduser(args.config_dir))) + + environment.set_value('CONFIG_DIR_OVERRIDE', + os.path.abspath(os.path.expanduser(args.config_dir))) environment.set_value('LOG_TO_CONSOLE', True) local_config.ProjectConfig().set_environment() logs.configure('run_bot') @@ -95,19 +96,20 @@ def _early_setup(args): def execute(args): """Executes the preprocess command.""" _early_setup(args) - + print(f'Running preprocess for fuzzer: {args.fuzzer}, job: {args.job}') - + with ndb_init.context(): uworker_env = _get_uworker_env(args) - + # tworker_preprocess expects: (module, task_argument, job_type, uworker_env) # For fuzz task, task_argument is fuzzer_name. - result = utasks.tworker_preprocess(fuzz_task, args.fuzzer, args.job, uworker_env) - + result = utasks.tworker_preprocess(fuzz_task, args.fuzzer, args.job, + uworker_env) + if result: download_url, _ = result - print(f'\nPreprocess successful!') + print('\nPreprocess successful!') print(f'Input Download URL: {download_url}') else: print('\nPreprocess failed or returned no result.')