From 6c76f1e862790858297ff9b1debc1a38756b1913 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Apr 2026 17:56:16 -0700 Subject: [PATCH 1/3] backfill: reject rev-list arguments that do not make sense Some rev-list options accepted by setup_revisions() are silently ignored or actively counterproductive when used with 'git backfill', because the path-walk API has its own tree-walking logic that bypasses the mechanisms these options rely on: * -S/-G (pickaxe) and --diff-filter work by computing per-commit diffs in get_revision_1() and filtering commits whose diffs don't match. Since backfill's goal is to download all blobs reachable from commits in the range, filtering out commits based on diff content would silently skip blobs -- the opposite of what users want. * --follow disables path pruning (revs->prune) and only makes sense for tracking a single file through renames in log output. It has no useful interaction with backfill. * -L (line-log) computes line-level diffs to track the evolution of a function or line range. Like pickaxe, it filters commits based on diff content, which would cause blobs to be silently skipped. * --diff-merges controls how merge commit diffs are displayed. The path-walk API walks trees directly and never computes per-commit diffs, so this option would be silently ignored. * --filter (object filtering, e.g. --filter=blob:none) is used by the list-objects traversal but is completely ignored by the path-walk API, so it would silently do nothing. Rather than letting users think these options are being honored, reject them with a clear error message. Signed-off-by: Elijah Newren --- builtin/backfill.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/builtin/backfill.c b/builtin/backfill.c index d794dd842f65ce..a9edddcb7edfa3 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -78,6 +78,28 @@ static int fill_missing_blobs(const char *path UNUSED, return 0; } +static void reject_unsupported_rev_list_options(struct rev_info *revs) +{ + if (revs->diffopt.pickaxe) + die(_("'%s' cannot be used with 'git backfill'"), + (revs->diffopt.pickaxe_opts & DIFF_PICKAXE_REGEX) ? "-G" : "-S"); + if (revs->diffopt.filter || revs->diffopt.filter_not) + die(_("'%s' cannot be used with 'git backfill'"), + "--diff-filter"); + if (revs->diffopt.flags.follow_renames) + die(_("'%s' cannot be used with 'git backfill'"), + "--follow"); + if (revs->line_level_traverse) + die(_("'%s' cannot be used with 'git backfill'"), + "-L"); + if (revs->explicit_diff_merges) + die(_("'%s' cannot be used with 'git backfill'"), + "--diff-merges"); + if (revs->filter.choice) + die(_("'%s' cannot be used with 'git backfill'"), + "--filter"); +} + static int do_backfill(struct backfill_context *ctx) { struct path_walk_info info = PATH_WALK_INFO_INIT; @@ -144,6 +166,7 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit if (argc > 1) die(_("unrecognized argument: %s"), argv[1]); + reject_unsupported_rev_list_options(&ctx.revs); repo_config(repo, git_default_config, NULL); From 173831ec92ea712a72f790f3a8eea6643ef7488b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Apr 2026 17:56:16 -0700 Subject: [PATCH 2/3] backfill: document acceptance of revision-range in more standard manner 302aff09223f (backfill: accept revision arguments, 2026-03-26) added support for passing revision arguments to 'git backfill' but documented them only with a prose sentence: You may also specify the commit limiting options from git-rev-list(1). No other command that accepts revision arguments documents them this way. Commands like log, shortlog, and replay define a formal entry and include rev-list-options.adoc. Commands like bundle, fast-export, and filter-branch, which pass arguments through to the revision machinery without including the full options file, still define a formal entry explaining what is accepted. Add a formal entry in the synopsis and OPTIONS section, following the convention used by other commands, and mention that commit-limiting options from git-rev-list(1) are also accepted. Signed-off-by: Elijah Newren --- Documentation/git-backfill.adoc | 15 ++++++++++++--- builtin/backfill.c | 2 +- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index 246ab417c24a10..bf26d7694f402d 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -9,7 +9,7 @@ git-backfill - Download missing objects in a partial clone SYNOPSIS -------- [synopsis] -git backfill [--min-batch-size=] [--[no-]sparse] +git backfill [--min-batch-size=] [--[no-]sparse] [] DESCRIPTION ----------- @@ -43,7 +43,7 @@ smaller network calls than downloading the entire repository at clone time. By default, `git backfill` downloads all blobs reachable from the `HEAD` -commit. This set can be restricted or expanded using various options. +commit. This set can be restricted or expanded using various options below. THIS COMMAND IS EXPERIMENTAL. ITS BEHAVIOR MAY CHANGE IN THE FUTURE. @@ -63,7 +63,16 @@ OPTIONS current sparse-checkout. If the sparse-checkout feature is enabled, then `--sparse` is assumed and can be disabled with `--no-sparse`. -You may also specify the commit limiting options from linkgit:git-rev-list[1]. +``:: + Backfill only blobs reachable from commits in the specified + revision range. When no __ is specified, it + defaults to `HEAD` (i.e. the whole history leading to the + current commit). For a complete list of ways to spell + __, see the "Specifying Ranges" section of + linkgit:gitrevisions[7]. ++ +You may also use commit-limiting options understood by +linkgit:git-rev-list[1] such as `--first-parent`, `--since`, or pathspecs. SEE ALSO -------- diff --git a/builtin/backfill.c b/builtin/backfill.c index a9edddcb7edfa3..e934d360fdd9d0 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -26,7 +26,7 @@ #include "path-walk.h" static const char * const builtin_backfill_usage[] = { - N_("git backfill [--min-batch-size=] [--[no-]sparse]"), + N_("git backfill [--min-batch-size=] [--[no-]sparse] []"), NULL }; From 607ed38e2a8ae94266b4a3d51610e604cca8df4f Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Apr 2026 17:56:16 -0700 Subject: [PATCH 3/3] backfill: default to grabbing edge blobs too Commit 302aff09223f (backfill: accept revision arguments, 2026-03-26) added support for accepting revision arguments to backfill. This allows users to do things like git backfill --remotes ^v2.3.0 and then run many commands without triggering on-demand downloads of blobs. However, if they have topics based on v2.3.0, they will likely still trigger on-demand downloads. Consider, for example, the command git log -p v2.3.0..topic This would still trigger on-demand blob loadings after the backfill command above, because the commit(s) with A as a parent will need to diff against the blobs in A. In fact, multiple commands need blobs from the lower boundary of the revision range: * git log -p A..B # After backfill A..B * git replay --onto TARGET A..B # After backfill TARGET^! A..B * git checkout A && git merge B # After backfill A...B Add an extra --[no-]include-edges flag to allow grabbing blobs from edge commits. Since the point of backfill is to prevent on-demand blob loading and these are common commands, default to --include-edges. Signed-off-by: Elijah Newren --- Documentation/git-backfill.adoc | 9 ++- builtin/backfill.c | 8 ++- t/t5620-backfill.sh | 110 ++++++++++++++++++++++++++++++-- 3 files changed, 119 insertions(+), 8 deletions(-) diff --git a/Documentation/git-backfill.adoc b/Documentation/git-backfill.adoc index bf26d7694f402d..c0a3b80615e034 100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc @@ -9,7 +9,7 @@ git-backfill - Download missing objects in a partial clone SYNOPSIS -------- [synopsis] -git backfill [--min-batch-size=] [--[no-]sparse] [] +git backfill [--min-batch-size=] [--[no-]sparse] [--[no-]include-edges] [] DESCRIPTION ----------- @@ -63,6 +63,13 @@ OPTIONS current sparse-checkout. If the sparse-checkout feature is enabled, then `--sparse` is assumed and can be disabled with `--no-sparse`. +`--include-edges`:: +`--no-include-edges`:: + Include blobs from boundary commits in the backfill. Useful in + preparation for commands like `git log -p A..B` or `git replay + --onto TARGET A..B`, where A..B normally excludes A but you need + the blobs from A as well. `--include-edges` is the default. + ``:: Backfill only blobs reachable from commits in the specified revision range. When no __ is specified, it diff --git a/builtin/backfill.c b/builtin/backfill.c index e934d360fdd9d0..7ffab2ea74f5cc 100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@ -26,7 +26,7 @@ #include "path-walk.h" static const char * const builtin_backfill_usage[] = { - N_("git backfill [--min-batch-size=] [--[no-]sparse] []"), + N_("git backfill [--min-batch-size=] [--[no-]sparse] [--[no-]include-edges] []"), NULL }; @@ -35,6 +35,7 @@ struct backfill_context { struct oid_array current_batch; size_t min_batch_size; int sparse; + int include_edges; struct rev_info revs; }; @@ -116,6 +117,8 @@ static int do_backfill(struct backfill_context *ctx) /* Walk from HEAD if otherwise unspecified. */ if (!ctx->revs.pending.nr) add_head_to_pending(&ctx->revs); + if (ctx->include_edges) + ctx->revs.edge_hint = 1; info.blobs = 1; info.tags = info.commits = info.trees = 0; @@ -143,12 +146,15 @@ int cmd_backfill(int argc, const char **argv, const char *prefix, struct reposit .min_batch_size = 50000, .sparse = -1, .revs = REV_INFO_INIT, + .include_edges = 1, }; struct option options[] = { OPT_UNSIGNED(0, "min-batch-size", &ctx.min_batch_size, N_("Minimum number of objects to request at a time")), OPT_BOOL(0, "sparse", &ctx.sparse, N_("Restrict the missing objects to the current sparse-checkout")), + OPT_BOOL(0, "include-edges", &ctx.include_edges, + N_("Include blobs from boundary commits in the backfill")), OPT_END(), }; struct repo_config_values *cfg = repo_config_values(the_repository); diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index f3b5e39493677b..94f35ce1901671 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -257,11 +257,12 @@ test_expect_success 'backfill with revision range' ' git -C backfill-revs rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - git -C backfill-revs backfill HEAD~2..HEAD && + GIT_TRACE2_EVENT="$(pwd)/backfill-trace" git -C backfill-revs backfill HEAD~2..HEAD && - # 30 objects downloaded. + # 36 objects downloaded, 12 still missing + test_trace2_data promisor fetch_count 36 missing && - test_line_count = 18 missing + test_line_count = 12 missing ' test_expect_success 'backfill with revisions over stdin' ' @@ -279,11 +280,12 @@ test_expect_success 'backfill with revisions over stdin' ' ^HEAD~2 EOF - git -C backfill-revs backfill --stdin missing && - test_line_count = 18 missing + test_line_count = 12 missing ' test_expect_success 'backfill with prefix pathspec' ' @@ -398,6 +400,102 @@ test_expect_success 'backfill with --since' ' test_line_count = 6 missing ' +test_expect_success 'backfill range with include-edges enables fetch-free git-log' ' + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-log && + + # Backfill the range with default include edges. + git -C backfill-log backfill HEAD~2..HEAD && + + # git log -p needs edge blobs for the "before" side of + # diffs. With edge inclusion, all needed blobs are local. + GIT_TRACE2_EVENT="$(pwd)/log-trace" git \ + -C backfill-log log -p HEAD~2..HEAD >log-output && + + # No promisor fetches should have been needed. + ! grep "fetch_count" log-trace +' + +test_expect_success 'backfill range without include edges causes on-demand fetches in git-log' ' + git clone --no-checkout --filter=blob:none \ + --single-branch --branch=main \ + "file://$(pwd)/srv.bare" backfill-log-no-bdy && + + # Backfill WITHOUT include edges -- file.3 v1 blobs are missing. + git -C backfill-log-no-bdy backfill --no-include-edges HEAD~2..HEAD && + + # git log -p HEAD~2..HEAD computes diff of commit 7 against + # commit 6. It needs file.3 v1 (the "before" side), which was + # not backfilled. This triggers on-demand promisor fetches. + GIT_TRACE2_EVENT="$(pwd)/log-no-bdy-trace" git \ + -C backfill-log-no-bdy log -p HEAD~2..HEAD >log-output && + + grep "fetch_count" log-no-bdy-trace +' + +test_expect_success 'backfill range enables fetch-free replay' ' + # Create a repo with a branch to replay. + git init replay-src && + ( + cd replay-src && + git config uploadpack.allowfilter 1 && + git config uploadpack.allowanysha1inwant 1 && + test_commit base && + git checkout -b topic && + test_commit topic-change && + git checkout main && + test_commit main-change + ) && + git clone --bare --filter=blob:none \ + "file://$(pwd)/replay-src" replay-dest.git && + + # Backfill the replay range: --onto main, replaying topic~1..topic. + # For replay, we need TARGET^! plus the range. + main_oid=$(git -C replay-dest.git rev-parse main) && + topic_oid=$(git -C replay-dest.git rev-parse topic) && + base_oid=$(git -C replay-dest.git rev-parse topic~1) && + git -C replay-dest.git backfill \ + "$main_oid^!" "$base_oid..$topic_oid" && + + # Now replay should complete without any promisor fetches. + GIT_TRACE2_EVENT="$(pwd)/replay-trace" git -C replay-dest.git \ + replay --onto main topic~1..topic >replay-out && + + ! grep "fetch_count" replay-trace +' + +test_expect_success 'backfill enables fetch-free merge' ' + # Create a repo with two branches to merge. + git init merge-src && + ( + cd merge-src && + git config uploadpack.allowfilter 1 && + git config uploadpack.allowanysha1inwant 1 && + test_commit merge-base && + git checkout -b side && + test_commit side-change && + git checkout main && + test_commit main-side-change + ) && + git clone --filter=blob:none \ + "file://$(pwd)/merge-src" merge-dest && + + # The clone checked out main, fetching its blobs. + # Backfill the three endpoint commits needed for merge. + main_oid=$(git -C merge-dest rev-parse origin/main) && + side_oid=$(git -C merge-dest rev-parse origin/side) && + mbase=$(git -C merge-dest merge-base origin/main origin/side) && + git -C merge-dest backfill --no-include-edges \ + "$main_oid^!" "$side_oid^!" "$mbase^!" && + + # Merge should complete without promisor fetches. + GIT_TRACE2_EVENT="$(pwd)/merge-trace" git -C merge-dest \ + merge origin/side -m "test merge" && + + ! grep "fetch_count" merge-trace +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd