From 53f9561055e8377a18a2430262857a3650c73dab Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Mon, 11 May 2026 12:59:11 +0000 Subject: [PATCH 01/11] commit-reach: introduce merge_base_flags enum Replace the boolean ignore_missing_commits parameter in paint_down_to_common() with an enum merge_base_flags, and thread the flags through merge_bases_many(), get_merge_bases_many_0(), and the public repo_get_merge_bases_many_dirty() API. This makes callsites with boolean parameters easier to read and prepares the function for additional flags in a subsequent commit. No functional change: the single caller that used ignore_missing_commits (repo_in_merge_bases_many) now sets MERGE_BASE_IGNORE_MISSING_COMMITS in the flags word, and all other callers pass 0. Signed-off-by: Kristofer Karlsson Signed-off-by: Junio C Hamano --- builtin/merge-base.c | 3 ++- commit-reach.c | 23 +++++++++++++++-------- commit-reach.h | 5 +++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/builtin/merge-base.c b/builtin/merge-base.c index c7ee97fa6ac62a..9b50b4660ea64f 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -14,7 +14,8 @@ static int show_merge_base(struct commit **rev, size_t rev_nr, int show_all) struct commit_list *result = NULL, *r; if (repo_get_merge_bases_many_dirty(the_repository, rev[0], - rev_nr - 1, rev + 1, &result) < 0) { + rev_nr - 1, rev + 1, + 0, &result) < 0) { commit_list_free(result); return -1; } diff --git a/commit-reach.c b/commit-reach.c index d3a9b3ed6fe561..766ba1156af63e 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -54,7 +54,7 @@ static int paint_down_to_common(struct repository *r, struct commit *one, int n, struct commit **twos, timestamp_t min_generation, - int ignore_missing_commits, + enum merge_base_flags mb_flags, struct commit_list **result) { struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; @@ -118,7 +118,7 @@ static int paint_down_to_common(struct repository *r, * corrupt commits would already have been * dispatched with a `die()`. */ - if (ignore_missing_commits) + if (mb_flags & MERGE_BASE_IGNORE_MISSING_COMMITS) return 0; return error(_("could not parse commit %s"), oid_to_hex(&p->object.oid)); @@ -136,6 +136,7 @@ static int paint_down_to_common(struct repository *r, static int merge_bases_many(struct repository *r, struct commit *one, int n, struct commit **twos, + enum merge_base_flags mb_flags, struct commit_list **result) { struct commit_list *list = NULL, **tail = result; @@ -165,7 +166,7 @@ static int merge_bases_many(struct repository *r, oid_to_hex(&twos[i]->object.oid)); } - if (paint_down_to_common(r, one, n, twos, 0, 0, &list)) { + if (paint_down_to_common(r, one, n, twos, 0, mb_flags, &list)) { commit_list_free(list); return -1; } @@ -425,6 +426,7 @@ static int get_merge_bases_many_0(struct repository *r, size_t n, struct commit **twos, int cleanup, + enum merge_base_flags mb_flags, struct commit_list **result) { struct commit_list *list, **tail = result; @@ -432,7 +434,7 @@ static int get_merge_bases_many_0(struct repository *r, size_t cnt, i; int ret; - if (merge_bases_many(r, one, n, twos, result) < 0) + if (merge_bases_many(r, one, n, twos, mb_flags, result) < 0) return -1; for (i = 0; i < n; i++) { if (one == twos[i]) @@ -475,16 +477,17 @@ int repo_get_merge_bases_many(struct repository *r, struct commit **twos, struct commit_list **result) { - return get_merge_bases_many_0(r, one, n, twos, 1, result); + return get_merge_bases_many_0(r, one, n, twos, 1, 0, result); } int repo_get_merge_bases_many_dirty(struct repository *r, struct commit *one, size_t n, struct commit **twos, + enum merge_base_flags mb_flags, struct commit_list **result) { - return get_merge_bases_many_0(r, one, n, twos, 0, result); + return get_merge_bases_many_0(r, one, n, twos, 0, mb_flags, result); } int repo_get_merge_bases(struct repository *r, @@ -492,7 +495,7 @@ int repo_get_merge_bases(struct repository *r, struct commit *two, struct commit_list **result) { - return get_merge_bases_many_0(r, one, 1, &two, 1, result); + return get_merge_bases_many_0(r, one, 1, &two, 1, 0, result); } /* @@ -537,6 +540,10 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, struct commit_list *bases = NULL; int ret = 0, i; timestamp_t generation, max_generation = GENERATION_NUMBER_ZERO; + enum merge_base_flags mb_flags = 0; + + if (ignore_missing_commits) + mb_flags |= MERGE_BASE_IGNORE_MISSING_COMMITS; if (repo_parse_commit(r, commit)) return ignore_missing_commits ? 0 : -1; @@ -555,7 +562,7 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, if (paint_down_to_common(r, commit, nr_reference, reference, - generation, ignore_missing_commits, &bases)) + generation, mb_flags, &bases)) ret = -1; else if (commit->object.flags & PARENT2) ret = 1; diff --git a/commit-reach.h b/commit-reach.h index 6012402dfcfe45..a3f2cd80ebf180 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -17,10 +17,15 @@ int repo_get_merge_bases_many(struct repository *r, struct commit *one, size_t n, struct commit **twos, struct commit_list **result); +enum merge_base_flags { + MERGE_BASE_IGNORE_MISSING_COMMITS = (1 << 0), +}; + /* To be used only when object flags after this call no longer matter */ int repo_get_merge_bases_many_dirty(struct repository *r, struct commit *one, size_t n, struct commit **twos, + enum merge_base_flags mb_flags, struct commit_list **result); int get_octopus_merge_bases(struct commit_list *in, struct commit_list **result); From 93e5b1680e30650ceca889a56429a055f17033bd Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Mon, 11 May 2026 12:59:12 +0000 Subject: [PATCH 02/11] commit-reach: early exit paint_down_to_common for single merge-base Commits not in the commit-graph get GENERATION_NUMBER_INFINITY and sort to the top of the priority queue. After those, commits with finite generation numbers are popped in non-increasing order. When MERGE_BASE_FIND_ALL is not set the first doubly-painted commit with a finite generation is therefore a best merge-base: no commit still in the queue can be a descendant of it. Skip the expensive STALE drain in this case. Add MERGE_BASE_FIND_ALL to the merge_base_flags enum. Callers that need every merge-base (repo_get_merge_bases_many, repo_get_merge_bases, repo_in_merge_bases_many, remove_redundant_no_gen) pass the flag to preserve existing behavior. git merge-base (without --all) passes 0, triggering the early exit. On a 2.2M-commit merge-heavy monorepo with commit-graph: HEAD vs ~500: 5,229ms -> 24ms HEAD vs ~1000: 4,214ms -> 39ms HEAD vs ~5000: 3,799ms -> 46ms HEAD vs ~10000: 3,827ms -> 61ms Signed-off-by: Kristofer Karlsson Signed-off-by: Junio C Hamano --- builtin/merge-base.c | 3 ++- commit-reach.c | 19 +++++++++++++++---- commit-reach.h | 7 ++++++- t/t6600-test-reach.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/builtin/merge-base.c b/builtin/merge-base.c index 9b50b4660ea64f..a87011c6cdab46 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -11,11 +11,12 @@ static int show_merge_base(struct commit **rev, size_t rev_nr, int show_all) { + enum merge_base_flags flags = show_all ? MERGE_BASE_FIND_ALL : 0; struct commit_list *result = NULL, *r; if (repo_get_merge_bases_many_dirty(the_repository, rev[0], rev_nr - 1, rev + 1, - 0, &result) < 0) { + flags, &result) < 0) { commit_list_free(result); return -1; } diff --git a/commit-reach.c b/commit-reach.c index 766ba1156af63e..5a52be90a66f5a 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -97,6 +97,14 @@ static int paint_down_to_common(struct repository *r, if (!(commit->object.flags & RESULT)) { commit->object.flags |= RESULT; tail = commit_list_append(commit, tail); + /* + * The queue is generation-ordered; no + * remaining common ancestor can be a + * descendant of this one. + */ + if (!(mb_flags & MERGE_BASE_FIND_ALL) && + generation < GENERATION_NUMBER_INFINITY) + break; } /* Mark parents of a found merge stale */ flags |= STALE; @@ -247,7 +255,8 @@ static int remove_redundant_no_gen(struct repository *r, min_generation = curr_generation; } if (paint_down_to_common(r, array[i], filled, - work, min_generation, 0, &common)) { + work, min_generation, + MERGE_BASE_FIND_ALL, &common)) { clear_commit_marks(array[i], all_flags); clear_commit_marks_many(filled, work, all_flags); commit_list_free(common); @@ -477,7 +486,8 @@ int repo_get_merge_bases_many(struct repository *r, struct commit **twos, struct commit_list **result) { - return get_merge_bases_many_0(r, one, n, twos, 1, 0, result); + return get_merge_bases_many_0(r, one, n, twos, 1, + MERGE_BASE_FIND_ALL, result); } int repo_get_merge_bases_many_dirty(struct repository *r, @@ -495,7 +505,8 @@ int repo_get_merge_bases(struct repository *r, struct commit *two, struct commit_list **result) { - return get_merge_bases_many_0(r, one, 1, &two, 1, 0, result); + return get_merge_bases_many_0(r, one, 1, &two, 1, + MERGE_BASE_FIND_ALL, result); } /* @@ -540,7 +551,7 @@ int repo_in_merge_bases_many(struct repository *r, struct commit *commit, struct commit_list *bases = NULL; int ret = 0, i; timestamp_t generation, max_generation = GENERATION_NUMBER_ZERO; - enum merge_base_flags mb_flags = 0; + enum merge_base_flags mb_flags = MERGE_BASE_FIND_ALL; if (ignore_missing_commits) mb_flags |= MERGE_BASE_IGNORE_MISSING_COMMITS; diff --git a/commit-reach.h b/commit-reach.h index a3f2cd80ebf180..3f3a563d8a5dd1 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -19,9 +19,14 @@ int repo_get_merge_bases_many(struct repository *r, struct commit_list **result); enum merge_base_flags { MERGE_BASE_IGNORE_MISSING_COMMITS = (1 << 0), + MERGE_BASE_FIND_ALL = (1 << 1), }; -/* To be used only when object flags after this call no longer matter */ +/* + * To be used only when object flags after this call no longer matter. + * Without MERGE_BASE_FIND_ALL and with generation numbers available, + * returns after finding the first merge-base, skipping the STALE drain. + */ int repo_get_merge_bases_many_dirty(struct repository *r, struct commit *one, size_t n, struct commit **twos, diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index dc0421ed2f3726..51c23b76833126 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -882,4 +882,44 @@ test_expect_success 'rev-list --maximal-only matches merge-base --independent' ' test_cmp expect.sorted actual.sorted ' +# The following tests verify the early-exit optimisation in +# paint_down_to_common when merge-base is invoked without --all. +# Each test checks all four commit-graph configurations. + +merge_base_all_modes () { + test_when_finished rm -rf .git/objects/info/commit-graph && + git merge-base "$@" >actual && + test_cmp expect actual && + cp commit-graph-full .git/objects/info/commit-graph && + git merge-base "$@" >actual && + test_cmp expect actual && + cp commit-graph-half .git/objects/info/commit-graph && + git merge-base "$@" >actual && + test_cmp expect actual && + cp commit-graph-no-gdat .git/objects/info/commit-graph && + git merge-base "$@" >actual && + test_cmp expect actual +} + +test_expect_success 'merge-base without --all (unique base)' ' + git rev-parse commit-5-3 >expect && + merge_base_all_modes commit-5-7 commit-8-3 +' + +test_expect_success 'merge-base without --all is one of --all results' ' + test_when_finished rm -rf .git/objects/info/commit-graph && + + cp commit-graph-full .git/objects/info/commit-graph && + git merge-base --all commit-5-7 commit-4-8 commit-6-6 commit-8-3 >all && + git merge-base commit-5-7 commit-4-8 commit-6-6 commit-8-3 >single && + test_line_count = 1 single && + grep -F -f single all && + + cp commit-graph-half .git/objects/info/commit-graph && + git merge-base --all commit-5-7 commit-4-8 commit-6-6 commit-8-3 >all && + git merge-base commit-5-7 commit-4-8 commit-6-6 commit-8-3 >single && + test_line_count = 1 single && + grep -F -f single all +' + test_done From 61f711de4b0fda3464ef54637b3600a6a5aab214 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Mon, 11 May 2026 12:21:53 +0000 Subject: [PATCH 03/11] sequencer: remove todo_add_branch_context.commit The 'commit' field in 'struct todo_add_branch_context' is unused. It's written to, but never read from. add_decorations_to_list() gets the commit passed to it explicitly as an argument. Signed-off-by: Abhinav Gupta Signed-off-by: Junio C Hamano --- sequencer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sequencer.c b/sequencer.c index b7d8dca47f4a58..19839da1e6b96e 100644 --- a/sequencer.c +++ b/sequencer.c @@ -6409,7 +6409,6 @@ struct todo_add_branch_context { size_t items_nr; size_t items_alloc; struct strbuf *buf; - struct commit *commit; struct string_list refs_to_oids; }; @@ -6498,7 +6497,6 @@ static int todo_list_add_update_ref_commands(struct todo_list *todo_list) ctx.items[ctx.items_nr++] = todo_list->items[i++]; if (item->commit) { - ctx.commit = item->commit; add_decorations_to_list(item->commit, &ctx); } } From a9ce8526dcc8592429ae73c8f73b29792aa30aa1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 12 May 2026 12:20:22 -0400 Subject: [PATCH 04/11] pretty: drop strbuf pre-sizing from add_rfc2047() At the top of add_rfc2047() we do this: strbuf_grow(sb, len * 3 + strlen(encoding) + 100); where "len" is the size of the header (like an author name) we are about to encode into the buffer. This pre-sizing is purely an optimization; we use strbuf_addf() and friends to actually write into the buffer, and they will grow the buffer as necessary. But there's a problem with the code above: the input can be arbitrarily large, so we might overflow a size_t while doing that computation, ending up with a too-small allocation request. Overflowing requires an impractically large input on a 64-bit system, but is easy to demonstrate on a 32-bit system with a commit whose author name is ~1.4GB. Because this pre-sizing is just an optimization, there's no real harm. We'll start with a smaller buffer and grow it as necessary. But it _looks_ like a vulnerability, since some other code may pre-size a strbuf and then write directly into its buffer. So it's worth avoiding the overflow in the first place. The obvious way to do that is via checked operations like st_add() and friends. But taking a step back, is this pre-sizing actually helping anything? The computation goes all the way back to 4234a76167 (Extend --pretty=oneline to cover the first paragraph,, 2007-06-11), but back then we really were sizing the array to write into directly! In 674d172730 (Rework pretty_print_commit to use strbufs instead of custom buffers., 2007-09-10) that switched to a strbuf, and at that point it was a pure optimization. Is the optimization helping? I don't think so. Even for a gigantic case like the 1.4GB author name, I couldn't measure any slowdown when removing it. And most input will be much smaller, and added to a running strbuf containing the rest of the email-header output. We can just rely on strbuf's usual amortized-linear growth. So deleting the line seems like the best way to go. It eliminates the integer overflow and makes the code a tiny bit simpler. Reported-by: Luke Martin Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pretty.c | 1 - 1 file changed, 1 deletion(-) diff --git a/pretty.c b/pretty.c index e0646bbc5d49cc..55d44f19cdd295 100644 --- a/pretty.c +++ b/pretty.c @@ -399,7 +399,6 @@ static void add_rfc2047(struct strbuf *sb, const char *line, size_t len, int i; int line_len = last_line_length(sb); - strbuf_grow(sb, len * 3 + strlen(encoding) + 100); strbuf_addf(sb, "=?%s?q?", encoding); line_len += strlen(encoding) + 5; /* 5 for =??q? */ From b92387cd55f9fa66edd6e646ec5c17be8d72609b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 12 May 2026 12:26:19 -0400 Subject: [PATCH 05/11] http: handle absolute-path alternates from server root When a dumb http server reports alternates with an absolute path, we try to paste that onto the root of the URL we're trying to fetch from. So if we go to "http://example.com/path/to/child.git" and it tells us about an alternate at "/parent.git", we'll hit "http://example.com/parent.git". But there's a bug in computing the base when the URL does not have any path component at all, like "http://example.com". When looking for the first slash after the host, strchr() returns NULL, and we compute a nonsense value for the length of the host portion. And then when we use that length to copy the base of the URL into a strbuf, we're likely to fail. The security implications are minimal here. We store the nonsense length ("serverlen") as an int, so on a 64-bit system it may effectively be anything (it is zero minus a 64-bit heap pointer, then truncated to 32-bits and stuffed into a signed value). When we feed that length to strbuf_add(), it is cast into a size_t and one of four things will happen: 1. If serverlen was negative, it will turn into a very large positive value and strbuf_add() will fail to allocate, ending the program. Ditto if serverlen was positive but just very large. This doesn't really get an attacker anything; the victim will just fail to clone their evil repo. 2. If serverlen was small enough, we'll successfully extend the target strbuf, and then copy an arbitrary set of bytes from "base". And then one of these is true: a. That set of bytes is much larger than the length of the "base" string. This is an out-of-bounds read, but there's no out-of-bounds write, since the strbuf code both allocates and copies using the same size_t. This is likely to cause a segfault as we try to read unmapped pages of memory. b. Like (2a), but if the set of bytes is small enough we might not segfault. We might read random memory from the process and copy it into the "target" strbuf. What happens then? We know that "base" ends with a NUL terminator, which will be copied into "target" as well. So even though target.len might be 1000 bytes (or whatever), when interpreted as a NUL-terminated string, target.buf is still the exact same string as "base". And that's all we ever do with target: pass it around as a C string, and then eventually strbuf_detach() it to become a C string. So even though there was arbitrary memory copied into the strbuf, we never access it. c. The other interesting case is when serverlen is actually _shorter_ than the length of base. And there we truncate the string. Probably in a way that makes it totally invalid, but if you were very unlucky you could turn something like: http://victim.com.evil.domain:8000 into: http://victim.com Which looks like the start of a redirect attack, except that the attacker could just have written "http://victim.com" in the first place! Either way we feed it to is_alternate_allowed(), which is where we check redirect and protocol rules. I think we can just treat this like a regular bug. And it's quite a weird setup in the first place, as it implies that the root of the web server is serving a repository (i.e., that you can get something useful from "http://example.com/info/refs"). The bug has been there since b3661567cf ([PATCH] Add support for alternates in HTTP, 2005-09-14) without anybody noticing. I kind of doubt anybody really cares about making this work, but it's easy enough to do so: the host-portion of the URL ends at either the first slash or the end-of-string. So we can just replace strchr() with strchrnul(). The test setup is a little gross, as we take over the httpd document root by shoving our bare-repo components into it. But it demonstrates the problem and shows that our solution actually allows the alternate to function, if the server is configured to allow it. Reported-by: slonkazoid Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-walker.c | 2 +- t/t5550-http-fetch-dumb.sh | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/http-walker.c b/http-walker.c index e886e6486646d1..badf07e499eac5 100644 --- a/http-walker.c +++ b/http-walker.c @@ -268,7 +268,7 @@ static void process_alternates_response(void *callback_data) */ const char *colon_ss = strstr(base,"://"); if (colon_ss) { - serverlen = (strchr(colon_ss + 3, '/') + serverlen = (strchrnul(colon_ss + 3, '/') - base); okay = 1; } diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index ed0ad66fade32b..802400b404ac2f 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -530,4 +530,24 @@ test_expect_success 'dumb http can fetch index v1' ' git -C idx-v1 fsck ' +test_expect_success 'absolute-path alternate when url has no path' ' + src=$HTTPD_DOCUMENT_ROOT_PATH/repo.git && + alt=absolute-alt.git && + git clone --bare --shared "$src" "$alt" && + + # Our repo has an alternate pointing to the absolute filesystem path, + # but that will not make any sense to an http client. So we will + # manually give it the equivalent path that the http server will + # understand. + echo "/dumb/repo.git/objects" >"$alt/objects/info/http-alternates" && + + # Now make our alt repository available at the root of the http + # server without any path (i.e., just http://localhost:1234). + git -C "$alt" update-server-info && + mv absolute-alt.git/* "$HTTPD_DOCUMENT_ROOT_PATH" && + + git -c http.followRedirects=true clone "$HTTPD_URL" alt-clone.git 2>err && + test_grep "adding alternate object store: $HTTPD_URL/dumb/repo.git" err +' + test_done From 321f0ea17b3bcb70867fad430e972548281803f0 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 12 May 2026 18:10:20 +0000 Subject: [PATCH 06/11] diff: reject negative values for --inter-hunk-context Negative values for --inter-hunk-context produce structurally invalid diff output with overlapping hunks: $ git log -1 -p -U3 --inter-hunk-context=-100 791aeddfa2 \ -- git-compat-util.h | grep '^@@' @@ -110,6 +110,9 @@ @@ -115,6 +118,9 @@ @@ -116,6 +122,7 @@ Hunk 1 covers lines 110-115, hunk 2 starts at 115 (overlap), hunk 3 starts at 116 (overlaps both). The resulting patch cannot be applied. The config variable diff.interHunkContext already rejects negative values, but the command line option does not. Change the type of diff_options.interhunkcontext and its static default from int to unsigned int, and switch the option parser from OPT_INTEGER_F to OPT_UNSIGNED. This rejects negative values at parse time via git_parse_unsigned() and enforces the correct type at compile time via BARF_UNLESS_UNSIGNED. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- diff.c | 13 ++++++------- diff.h | 2 +- t/t4032-diff-inter-hunk-context.sh | 6 ++++++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/diff.c b/diff.c index 397e38b41cc6fa..5df28e49c5057a 100644 --- a/diff.c +++ b/diff.c @@ -61,7 +61,7 @@ static enum git_colorbool diff_use_color_default = GIT_COLOR_UNKNOWN; static int diff_color_moved_default; static int diff_color_moved_ws_default; static int diff_context_default = 3; -static int diff_interhunk_context_default; +static unsigned int diff_interhunk_context_default; static char *diff_word_regex_cfg; static struct external_diff external_diff_cfg; static char *diff_order_file_cfg; @@ -388,10 +388,10 @@ int git_diff_ui_config(const char *var, const char *value, return 0; } if (!strcmp(var, "diff.interhunkcontext")) { - diff_interhunk_context_default = git_config_int(var, value, - ctx->kvi); - if (diff_interhunk_context_default < 0) + int val = git_config_int(var, value, ctx->kvi); + if (val < 0) return -1; + diff_interhunk_context_default = val; return 0; } if (!strcmp(var, "diff.renames")) { @@ -6111,9 +6111,8 @@ struct option *add_diff_options(const struct option *opts, OPT_CALLBACK_F(0, "default-prefix", options, NULL, N_("use default prefixes a/ and b/"), PARSE_OPT_NONEG | PARSE_OPT_NOARG, diff_opt_default_prefix), - OPT_INTEGER_F(0, "inter-hunk-context", &options->interhunkcontext, - N_("show context between diff hunks up to the specified number of lines"), - PARSE_OPT_NONEG), + OPT_UNSIGNED(0, "inter-hunk-context", &options->interhunkcontext, + N_("show context between diff hunks up to the specified number of lines")), OPT_CALLBACK_F(0, "output-indicator-new", &options->output_indicators[OUTPUT_INDICATOR_NEW], N_(""), diff --git a/diff.h b/diff.h index 7eb84aadf40c41..033d633db41129 100644 --- a/diff.h +++ b/diff.h @@ -296,7 +296,7 @@ struct diff_options { /* Number of context lines to generate in patch output. */ int context; - int interhunkcontext; + unsigned int interhunkcontext; /* Affects the way detection logic for complete rewrites, renames and * copies. diff --git a/t/t4032-diff-inter-hunk-context.sh b/t/t4032-diff-inter-hunk-context.sh index bada0cbd32f764..bec1676f8d5a41 100755 --- a/t/t4032-diff-inter-hunk-context.sh +++ b/t/t4032-diff-inter-hunk-context.sh @@ -114,4 +114,10 @@ test_expect_success 'diff.interHunkContext invalid' ' test_must_fail git diff ' +test_expect_success '--inter-hunk-context rejects negative value' ' + test_unconfig diff.interHunkContext && + test_must_fail git diff --inter-hunk-context=-1 2>err && + test_grep "expects a non-negative integer" err +' + test_done From 94a9e6934c5978a150de549046e68dad608bcf9f Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 12 May 2026 18:10:21 +0000 Subject: [PATCH 07/11] diff: reject negative values for -U/--unified Passing a negative value to -U is silently accepted and produces corrupt unified diff output with malformed hunk headers: $ git log -1 -p -U-500 -- GIT-VERSION-GEN | grep '^@@' @@ -503,999- +503,999- @@ Line 503 of a 106-line file, count "999-" is not a valid integer. The config variable diff.context already rejects negative values, but the command line callback diff_opt_unified() uses strtol() with no range check. Change the type of diff_options.context and its static default from int to unsigned int, matching the change to interhunkcontext in the previous commit. The type change requires reworking the callback and config parsing to validate in a local variable before assigning to the now-unsigned field. Unlike --inter-hunk-context which could be converted to OPT_UNSIGNED, -U needs OPT_CALLBACK_F for PARSE_OPT_OPTARG (bare -U with no value enables patch output). Add a range check in the callback instead. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- diff.c | 12 ++++++++---- diff.h | 2 +- t/t4055-diff-context.sh | 5 +++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/diff.c b/diff.c index 5df28e49c5057a..1771b2c444c005 100644 --- a/diff.c +++ b/diff.c @@ -60,7 +60,7 @@ static int diff_suppress_blank_empty; static enum git_colorbool diff_use_color_default = GIT_COLOR_UNKNOWN; static int diff_color_moved_default; static int diff_color_moved_ws_default; -static int diff_context_default = 3; +static unsigned int diff_context_default = 3; static unsigned int diff_interhunk_context_default; static char *diff_word_regex_cfg; static struct external_diff external_diff_cfg; @@ -382,9 +382,10 @@ int git_diff_ui_config(const char *var, const char *value, return 0; } if (!strcmp(var, "diff.context")) { - diff_context_default = git_config_int(var, value, ctx->kvi); - if (diff_context_default < 0) + int val = git_config_int(var, value, ctx->kvi); + if (val < 0) return -1; + diff_context_default = val; return 0; } if (!strcmp(var, "diff.interhunkcontext")) { @@ -5924,9 +5925,12 @@ static int diff_opt_unified(const struct option *opt, BUG_ON_OPT_NEG(unset); if (arg) { - options->context = strtol(arg, &s, 10); + long val = strtol(arg, &s, 10); if (*s) return error(_("%s expects a numerical value"), "--unified"); + if (val < 0) + return error(_("%s expects a non-negative integer"), "--unified"); + options->context = val; } enable_patch_output(&options->output_format); diff --git a/diff.h b/diff.h index 033d633db41129..bb5cddaf3499e9 100644 --- a/diff.h +++ b/diff.h @@ -294,7 +294,7 @@ struct diff_options { enum git_colorbool use_color; /* Number of context lines to generate in patch output. */ - int context; + unsigned int context; unsigned int interhunkcontext; diff --git a/t/t4055-diff-context.sh b/t/t4055-diff-context.sh index 1384a8195705e3..b26f6eea7c8332 100755 --- a/t/t4055-diff-context.sh +++ b/t/t4055-diff-context.sh @@ -82,6 +82,11 @@ test_expect_success 'negative integer config parsing' ' test_grep "bad config variable" output ' +test_expect_success '-U-1 is rejected' ' + test_must_fail git diff -U-1 2>err && + test_grep "expects a non-negative integer" err +' + test_expect_success '-U0 is valid, so is diff.context=0' ' test_config diff.context 0 && git diff >output && From 4517e4ca104ca93b51b4acf4d2c62e9325bbb623 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 12 May 2026 18:10:22 +0000 Subject: [PATCH 08/11] xdiff: guard against negative context lengths The xdemitconf_t fields ctxlen and interhunkctxlen are typed as long (signed), but negative values are not meaningful for context line counts. Unlike the diff_options fields changed in the previous two commits, these cannot be converted to unsigned because the xdiff arithmetic relies on signed subtraction: s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); If ctxlen were unsigned long, the signed operand would be implicitly converted to unsigned, and the subtraction would wrap to a large positive value when i1 < ctxlen, defeating the XDL_MAX clamp. The signed type is required for correct context-window calculations. The previous two commits reject negative values at the parse layer for --inter-hunk-context and -U/--unified, so negative values should no longer reach xdiff in normal use. Add BUG() guards at the top of xdl_get_hunk() as defense in depth to catch programming errors in current or future callers that bypass option parsing. xdl_get_hunk() is called by both xdl_emit_diff() and xdl_call_hunk_func(), so a single guard covers all xdiff consumers. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- xdiff/xemit.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 04f7e9193b61f0..7cd9cf0a44f4d6 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -46,12 +46,20 @@ static long saturating_add(long a, long b) xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) { xdchange_t *xch, *xchp, *lxch; - long max_common = saturating_add(saturating_add(xecfg->ctxlen, - xecfg->ctxlen), - xecfg->interhunkctxlen); - long max_ignorable = xecfg->ctxlen; + long max_common; + long max_ignorable; long ignored = 0; /* number of ignored blank lines */ + if (xecfg->ctxlen < 0) + BUG("negative context length: %ld", xecfg->ctxlen); + if (xecfg->interhunkctxlen < 0) + BUG("negative inter-hunk context length: %ld", xecfg->interhunkctxlen); + + max_common = saturating_add(saturating_add(xecfg->ctxlen, + xecfg->ctxlen), + xecfg->interhunkctxlen); + max_ignorable = xecfg->ctxlen; + /* remove ignorable changes that are too far before other changes */ for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) { xch = xchp->next; From d654777d73061b112c7ed7bad604b72019f5aafa Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Tue, 12 May 2026 18:10:23 +0000 Subject: [PATCH 09/11] parse-options: clarify what "negated" means for PARSE_OPT_NONEG The documentation says the flag prevents an option from being "negated" without specifying what that means. Add a parenthetical to clarify that it rejects the "--no-