From c533b8ef716d2fdbea57f01e757a37766426c73e Mon Sep 17 00:00:00 2001
From: Dylan Jew <dylanj@google.com>
Date: Thu, 30 Apr 2026 17:17:22 -0400
Subject: [PATCH] Add script to update all fuzzer stats tables

---
 .../scripts/update_bigquery_stats_schema.py   | 88 +++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 src/local/butler/scripts/update_bigquery_stats_schema.py

diff --git a/src/local/butler/scripts/update_bigquery_stats_schema.py b/src/local/butler/scripts/update_bigquery_stats_schema.py
new file mode 100644
index 0000000000..de731826df
--- /dev/null
+++ b/src/local/butler/scripts/update_bigquery_stats_schema.py
@@ -0,0 +1,88 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Script to migrate BigQuery JobRun table schemas for old fuzzers."""
+
+from clusterfuzz._internal.base import utils
+from clusterfuzz._internal.datastore import data_types
+from clusterfuzz._internal.datastore import ndb_utils
+from clusterfuzz._internal.google_cloud_utils import big_query
+from clusterfuzz._internal.metrics import fuzzer_stats
+
+_JOB_RUN_TABLE_ID = 'JobRun'
+
+
+def _get_table(bigquery_client, project_id, dataset_id):
+  table = None
+  try:
+    table = bigquery_client.tables().get(
+        projectId=project_id, datasetId=dataset_id,
+        tableId=_JOB_RUN_TABLE_ID).execute()
+  except Exception as e:
+    print(f'Failed getting table details of {dataset_id}: {e}')
+
+  return table
+
+
+def execute(args):
+  """Migrate historical BigQuery JobRun tables to current JOB_RUN_SCHEMA.
+
+  Adds new duration fields to fuzzer_stats datasets in BigQuery.
+  """
+  print('Starting BigQuery stats tables schema migration.')
+
+  bigquery_client = big_query.get_api_client()
+  project_id = utils.get_application_id()
+
+  fuzzers = list(
+      data_types.Fuzzer.query(ndb_utils.is_false(data_types.Fuzzer.builtin)))
+
+  count = 0
+  for fuzzer in fuzzers:
+    dataset_id = fuzzer_stats.dataset_name(fuzzer.name)
+
+    table = _get_table(bigquery_client, project_id, dataset_id)
+    if not table:
+      continue
+
+    fields = table.get('schema', {}).get('fields', [])
+    existing_field_names = {f['name'] for f in fields}
+
+    expected_fields = fuzzer_stats.JobRun.SCHEMA['fields']
+    missing_fields = [
+        f for f in expected_fields if f['name'] not in existing_field_names
+    ]
+
+    if not missing_fields:
+      continue
+
+    updated_fields = list(fields) + missing_fields
+    body = {'schema': {'fields': updated_fields}}
+
+    if not args.non_dry_run:
+      missing_field_names = [f['name'] for f in missing_fields]
+      print(f'DRY RUN: Would append {missing_field_names} to {fuzzer.name}.')
+    else:
+      try:
+        bigquery_client.tables().patch(  # pylint: disable=no-member
+            projectId=project_id,
+            datasetId=dataset_id,
+            tableId=_JOB_RUN_TABLE_ID,
+            body=body).execute()
+        print(f'Successfully updated schema for fuzzer: {fuzzer.name}')
+        count += 1
+      except Exception as e:
+        # Log error and continue to update the remaining fuzzers
+        print(f'Error updating schema for {fuzzer.name}: {e}')
+
+  print(f'BigQuery schema migration complete. Updated {count} fuzzer table.')