diff --git a/cache-tree.c b/cache-tree.c index 7881b42aa24c80..f11844fe72020e 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -192,22 +192,62 @@ static int verify_cache(struct index_state *istate, int flags) for (i = 0; i + 1 < istate->cache_nr; i++) { /* path/file always comes after path because of the way * the cache is sorted. Also path can appear only once, - * which means conflicting one would immediately follow. + * so path/file is likely the immediately following path + * but might be separated if there is e.g. a + * path-internal/... file. */ const struct cache_entry *this_ce = istate->cache[i]; const struct cache_entry *next_ce = istate->cache[i + 1]; const char *this_name = this_ce->name; const char *next_name = next_ce->name; int this_len = ce_namelen(this_ce); + const char *conflict_name = NULL; + if (this_len < ce_namelen(next_ce) && - next_name[this_len] == '/' && + next_name[this_len] <= '/' && strncmp(this_name, next_name, this_len) == 0) { + if (next_name[this_len] == '/') { + conflict_name = next_name; + } else if (next_name[this_len] < '/') { + /* + * The immediately next entry shares our + * prefix but sorts before "path/" (e.g., + * "path-internal" between "path" and + * "path/file", since '-' (0x2D) < '/' + * (0x2F)). Binary search to find where + * "path/" would be and check for a D/F + * conflict there. + */ + struct cache_entry *other; + struct strbuf probe = STRBUF_INIT; + int pos; + + strbuf_add(&probe, this_name, this_len); + strbuf_addch(&probe, '/'); + pos = index_name_pos_sparse(istate, + probe.buf, + probe.len); + strbuf_release(&probe); + + if (pos < 0) + pos = -pos - 1; + if (pos >= (int)istate->cache_nr) + continue; + other = istate->cache[pos]; + if (ce_namelen(other) > this_len && + other->name[this_len] == '/' && + !strncmp(this_name, other->name, this_len)) + conflict_name = other->name; + } + } + + if (conflict_name) { if (10 < ++funny) { fprintf(stderr, "...\n"); break; } fprintf(stderr, "You have both %s and %s\n", - this_name, next_name); + this_name, conflict_name); } } if (funny) diff --git a/merge-ort.c b/merge-ort.c index 00923ce3cd749b..be0829bbb781ef 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -728,6 +728,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, strintmap_clear_func(&renames->deferred[i].possible_trivial_merges); strset_clear_func(&renames->deferred[i].target_dirs); renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */ + free(renames->pairs[i].queue); + diff_queue_init(&renames->pairs[i]); } renames->cached_pairs_valid_side = 0; renames->dir_rename_mask = 0; @@ -1008,32 +1010,34 @@ static int traverse_trees_wrapper(struct index_state *istate, info->traverse_path = renames->callback_data_traverse_path; info->fn = old_fn; for (i = old_offset; i < renames->callback_data_nr; ++i) { - info->fn(n, - renames->callback_data[i].mask, - renames->callback_data[i].dirmask, - renames->callback_data[i].names, - info); + ret = info->fn(n, + renames->callback_data[i].mask, + renames->callback_data[i].dirmask, + renames->callback_data[i].names, + info); + if (ret < 0) + break; } renames->callback_data_nr = old_offset; free(renames->callback_data_traverse_path); renames->callback_data_traverse_path = old_callback_data_traverse_path; info->traverse_path = NULL; - return 0; + return ret < 0 ? ret : 0; } -static void setup_path_info(struct merge_options *opt, - struct string_list_item *result, - const char *current_dir_name, - int current_dir_name_len, - char *fullpath, /* we'll take over ownership */ - struct name_entry *names, - struct name_entry *merged_version, - unsigned is_null, /* boolean */ - unsigned df_conflict, /* boolean */ - unsigned filemask, - unsigned dirmask, - int resolved /* boolean */) +static int setup_path_info(struct merge_options *opt, + struct string_list_item *result, + const char *current_dir_name, + int current_dir_name_len, + char *fullpath, /* we'll take over ownership */ + struct name_entry *names, + struct name_entry *merged_version, + unsigned is_null, /* boolean */ + unsigned df_conflict, /* boolean */ + unsigned filemask, + unsigned dirmask, + int resolved /* boolean */) { /* result->util is void*, so mi is a convenience typed variable */ struct merged_info *mi; @@ -1077,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt, */ mi->is_null = 1; } - strmap_put(&opt->priv->paths, fullpath, mi); + if (strmap_put(&opt->priv->paths, fullpath, mi)) + return error(_("tree has duplicate entries for '%s'"), fullpath); result->string = fullpath; result->util = mi; + return 0; } static void add_pair(struct merge_options *opt, @@ -1346,9 +1352,10 @@ static int collect_merge_info_callback(int n, */ if (side1_matches_mbase && side2_matches_mbase) { /* mbase, side1, & side2 all match; use mbase as resolution */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+0, mbase_null, 0 /* df_conflict */, - filemask, dirmask, 1 /* resolved */); + if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+0, mbase_null, 0 /* df_conflict */, + filemask, dirmask, 1 /* resolved */)) + return -1; /* Quit traversing */ return mask; } @@ -1360,9 +1367,10 @@ static int collect_merge_info_callback(int n, */ if (sides_match && filemask == 0x07) { /* use side1 (== side2) version as resolution */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+1, side1_null, 0, - filemask, dirmask, 1); + if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+1, side1_null, 0, + filemask, dirmask, 1)) + return -1; /* Quit traversing */ return mask; } @@ -1374,18 +1382,20 @@ static int collect_merge_info_callback(int n, */ if (side1_matches_mbase && filemask == 0x07) { /* use side2 version as resolution */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+2, side2_null, 0, - filemask, dirmask, 1); + if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+2, side2_null, 0, + filemask, dirmask, 1)) + return -1; /* Quit traversing */ return mask; } /* Similar to above but swapping sides 1 and 2 */ if (side2_matches_mbase && filemask == 0x07) { /* use side1 version as resolution */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+1, side1_null, 0, - filemask, dirmask, 1); + if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+1, side1_null, 0, + filemask, dirmask, 1)) + return -1; /* Quit traversing */ return mask; } @@ -1409,8 +1419,9 @@ static int collect_merge_info_callback(int n, * unconflict some more cases, but that comes later so all we can * do now is record the different non-null file hashes.) */ - setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, NULL, 0, df_conflict, filemask, dirmask, 0); + if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, NULL, 0, df_conflict, filemask, dirmask, 0)) + return -1; /* Quit traversing */ ci = pi.util; VERIFY_CI(ci); @@ -1738,7 +1749,6 @@ static int collect_merge_info(struct merge_options *opt, setup_traverse_info(&info, opt->priv->toplevel_dir); info.fn = collect_merge_info_callback; info.data = opt; - info.show_all_errors = 1; if (repo_parse_tree(opt->repo, merge_base) < 0 || repo_parse_tree(opt->repo, side1) < 0 || diff --git a/t/meson.build b/t/meson.build index 7528e5cda5fef0..362177999bd342 100644 --- a/t/meson.build +++ b/t/meson.build @@ -124,6 +124,7 @@ integration_tests = [ 't0090-cache-tree.sh', 't0091-bugreport.sh', 't0092-diagnose.sh', + 't0093-verify-cache-df-gap.sh', 't0095-bloom.sh', 't0100-previous.sh', 't0101-at-syntax.sh', diff --git a/t/t0093-direct-index-write.pl b/t/t0093-direct-index-write.pl new file mode 100644 index 00000000000000..2881a3ebb21dcd --- /dev/null +++ b/t/t0093-direct-index-write.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl +# +# Build a v2 index file from entries listed on stdin. +# Each line: "octalmode hex-oid name" +# Output: binary index written to stdout. +# +# This bypasses all D/F safety checks in add_index_entry(), simulating +# what happens when code uses ADD_CACHE_JUST_APPEND to bulk-load entries. +use strict; +use warnings; +use Digest::SHA qw(sha1 sha256); + +my $hash_algo = $ENV{'GIT_DEFAULT_HASH'} || 'sha1'; +my $hash_func = $hash_algo eq 'sha256' ? \&sha256 : \&sha1; + +my @entries; +while (my $line = ) { + chomp $line; + my ($mode, $oid_hex, $name) = split(/ /, $line, 3); + push @entries, [$mode, $oid_hex, $name]; +} + +my $body = "DIRC" . pack("NN", 2, scalar @entries); + +for my $ent (@entries) { + my ($mode, $oid_hex, $name) = @{$ent}; + # 10 x 32-bit stat fields (zeroed), with mode in position 7 + my $stat = pack("N10", 0, 0, 0, 0, 0, 0, oct($mode), 0, 0, 0); + my $oid = pack("H*", $oid_hex); + my $flags = pack("n", length($name) & 0xFFF); + my $entry = $stat . $oid . $flags . $name . "\0"; + # Pad to 8-byte boundary + while (length($entry) % 8) { $entry .= "\0"; } + $body .= $entry; +} + +binmode STDOUT; +print $body . $hash_func->($body); diff --git a/t/t0093-verify-cache-df-gap.sh b/t/t0093-verify-cache-df-gap.sh new file mode 100755 index 00000000000000..0b6829d805269d --- /dev/null +++ b/t/t0093-verify-cache-df-gap.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='verify_cache() must catch non-adjacent D/F conflicts + +Ensure that verify_cache() can complain about bad entries like: + + docs <-- submodule + docs-internal/... <-- sorts here because "-" < "/" + docs/... <-- D/F conflict with "docs" above, not adjacent + +In order to test verify_cache, we directly construct a corrupt index +(bypassing the D/F safety checks in add_index_entry) and verify that +write-tree rejects it. +' + +. ./test-lib.sh + +if ! test_have_prereq PERL +then + skip_all='skipping verify_cache D/F tests; Perl not available' + test_done +fi + +# Build a v2 index from entries on stdin, bypassing D/F checks. +# Each line: "octalmode hex-oid name" (entries must be pre-sorted). +build_corrupt_index () { + perl "$TEST_DIRECTORY/t0093-direct-index-write.pl" >"$1" +} + +test_expect_success 'setup objects' ' + test_commit base && + BLOB=$(git rev-parse HEAD:base.t) && + SUB_COMMIT=$(git rev-parse HEAD) +' + +test_expect_success 'adjacent D/F conflict is caught by verify_cache' ' + cat >index-entries <<-EOF && + 0160000 $SUB_COMMIT docs + 0100644 $BLOB docs/requirements.txt + EOF + build_corrupt_index .git/index err && + test_grep "You have both docs and docs/requirements.txt" err +' + +test_expect_success 'non-adjacent D/F conflict is caught by verify_cache' ' + cat >index-entries <<-EOF && + 0160000 $SUB_COMMIT docs + 0100644 $BLOB docs-internal/README.md + 0100644 $BLOB docs/requirements.txt + EOF + build_corrupt_index .git/index err && + test_grep "You have both docs and docs/requirements.txt" err +' + +test_done diff --git a/t/t6422-merge-rename-corner-cases.sh b/t/t6422-merge-rename-corner-cases.sh index e18d5a227d54f7..81b645bb3bdc5b 100755 --- a/t/t6422-merge-rename-corner-cases.sh +++ b/t/t6422-merge-rename-corner-cases.sh @@ -1525,4 +1525,58 @@ test_expect_success 'submodule/directory preliminary conflict' ' ) ' +# Testcase: submodule/directory conflict with duplicate tree entries +# One side has a path as a gitlink (submodule). The other side replaces +# the gitlink with a directory. A third-party tool creates a tree on the +# submodule side that has *both* a gitlink and a tree entry for the same +# path (adding a file inside the submodule path ignoring that there's a +# gitlink there). collect_merge_info_callback() should detect the +# duplicate and abort rather than silently corrupting its bookkeeping. + +test_expect_success 'duplicate tree entries trigger an error' ' + test_when_finished "rm -rf duplicate-entry" && + git init duplicate-entry && + ( + cd duplicate-entry && + + # Base commit: "docs" is a gitlink (submodule) + empty_tree=$(git mktree file.txt && + git add file.txt && + git commit -m base && + + # side1: remove the gitlink, replace with a directory + git checkout -b side1 && + git rm --cached docs && + mkdir -p docs && + echo hello >docs/requirements.txt && + git add docs/requirements.txt && + git commit -m "side1: submodule to directory" && + + # side2: keep the gitlink but craft a tree that also + # contains a tree entry for "docs" (simulating a tool + # that adds files inside a submodule path without + # removing the gitlink first). + git checkout main && + git checkout -b side2 && + blob_oid=$(echo world | git hash-object -w --stdin) && + docs_tree=$(printf "100644 blob %s\trequirements.txt\n" \ + "$blob_oid" | git mktree) && + cur_tree=$(git rev-parse HEAD^{tree}) && + git cat-file -p $cur_tree >tree-listing && + printf "040000 tree %s\tdocs\n" "$docs_tree" >>tree-listing && + new_tree=$(git mktree err && + test_grep "duplicate entries" err + ) +' + test_done