Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,16 @@ def _add_schedule_tworker_fuzz_subparser(toplevel_subparsers):
help='Script specific arguments (FUZZER_NAME JOB_NAME)')


def _add_preprocess_subparser(toplevel_subparsers):
"""Adds a parser for the `preprocess` command."""
parser = toplevel_subparsers.add_parser(
'preprocess', help='Run the preprocess step of a fuzz task locally.')
parser.add_argument('--fuzzer', required=True, help='The name of the fuzzer.')
parser.add_argument('--job', required=True, help='The job name.')
parser.add_argument(
'-c', '--config-dir', required=True, help='Path to application config.')


def _add_reproduce_subparser(toplevel_subparsers):
"""Adds a parser for the `reproduce` command."""
parser = toplevel_subparsers.add_parser(
Expand Down Expand Up @@ -491,6 +501,7 @@ def main():
_add_weights_subparser(subparsers)
_add_reproduce_subparser(subparsers)
_add_schedule_tworker_fuzz_subparser(subparsers)
_add_preprocess_subparser(subparsers)
args = parser.parse_args()
if not args.command:
parser.print_help()
Expand Down
115 changes: 115 additions & 0 deletions src/local/butler/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""preprocess.py runs the preprocess step of a fuzz task locally."""

import os
import sys
import uuid

from clusterfuzz._internal.bot.tasks import utasks
from clusterfuzz._internal.bot.tasks.utasks import fuzz_task
from clusterfuzz._internal.config import local_config
from clusterfuzz._internal.datastore import data_types
from clusterfuzz._internal.datastore import ndb_init
from clusterfuzz._internal.metrics import logs
from clusterfuzz._internal.system import environment


def _get_job_environment(job_name):
"""Fetches the job entity and returns its environment variables."""
job = data_types.Job.query(data_types.Job.name == job_name).get()
if job:
return job.get_environment()
raise RuntimeError(f'Error: Job {job_name} not found in Datastore.')


def _get_fuzzer_environment(fuzzer_name, job_name):
"""Fetches fuzzer entity and returns its additional environment variables."""
if environment.is_engine_fuzzer_job(job_name):
return {}

fuzzer = data_types.Fuzzer.query(data_types.Fuzzer.name == fuzzer_name).get()
if not fuzzer:
raise RuntimeError(f'Error: Fuzzer {fuzzer_name} not found in Datastore.')

additional_default_variables = ''
additional_variables_for_job = ''

if hasattr(
fuzzer,
'additional_environment_string') and fuzzer.additional_environment_string:
for line in fuzzer.additional_environment_string.splitlines():
if '=' in line and ':' in line.split('=', 1)[0]:
fuzzer_job_name, environment_definition = line.split(':', 1)
if fuzzer_job_name == job_name:
additional_variables_for_job += '\n%s' % environment_definition
continue
additional_default_variables += '\n%s' % line

env_string = additional_default_variables + additional_variables_for_job
return environment.parse_environment_definition(env_string)


def _get_uworker_env(args):
"""Prepares the complete environment variables for the payload."""
uworker_env = _get_job_environment(args.job)
uworker_env.update(_get_fuzzer_environment(args.fuzzer, args.job))

# Replicate what process_command_impl does in a real tworker
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use process_command_impl() then instead?

uworker_env['TASK_NAME'] = 'fuzz'
uworker_env['TASK_ARGUMENT'] = args.fuzzer
uworker_env['JOB_NAME'] = args.job

# Add logging metadata to be carried over to uworker_main
uworker_env['CF_TASK_NAME'] = 'fuzz'
uworker_env['CF_TASK_JOB_NAME'] = args.job
uworker_env['CF_TASK_ARGUMENT'] = args.fuzzer
uworker_env['CF_TASK_ID'] = str(uuid.uuid4())

return uworker_env


def _early_setup(args):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: any reason not to name it just setup?

"""Early setup needed for config and logs."""
sys.path.insert(0, os.path.abspath(os.path.join('src', 'appengine')))
sys.path.insert(
0, os.path.abspath(os.path.join('src', 'appengine', 'third_party')))

environment.set_value('CONFIG_DIR_OVERRIDE',
os.path.abspath(os.path.expanduser(args.config_dir)))
environment.set_value('LOG_TO_CONSOLE', True)
local_config.ProjectConfig().set_environment()
logs.configure('run_bot')


def execute(args):
"""Executes the preprocess command."""
_early_setup(args)

print(f'Running preprocess for fuzzer: {args.fuzzer}, job: {args.job}')

with ndb_init.context():
uworker_env = _get_uworker_env(args)

# tworker_preprocess expects: (module, task_argument, job_type, uworker_env)
# For fuzz task, task_argument is fuzzer_name.
result = utasks.tworker_preprocess(fuzz_task, args.fuzzer, args.job,
uworker_env)

if result:
download_url, _ = result
print('\nPreprocess successful!')
print(f'Input Download URL: {download_url}')
else:
print('\nPreprocess failed or returned no result.')
Loading