From cdeef283bcf8529fc858cfe7d18a7522294519c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 12 May 2026 13:56:00 +0200 Subject: [PATCH 1/4] strbuf: add strbuf_add_uint() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strbuf_addf() calls vsnprintf(3) underneath, which supports a plethora of formatting options. We can avoid its overhead in basic cases by providing specialized functions like strbuf_addstr() for strings. Add another one, strbuf_add_uint(), for unsigned integers. Prepare the number string in a temporary buffer. Make it big enough for any unsigned integer value: A decimal digit can represent ln(10)/ln(2) ≈ 3.32 bits; dividing the number of bits of uintmax_t by 3.3 and rounding up gives a sufficiently close conservative size estimate. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- strbuf.c | 12 ++++++++++++ strbuf.h | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/strbuf.c b/strbuf.c index 3e04addc22..9731ecdc1f 100644 --- a/strbuf.c +++ b/strbuf.c @@ -361,6 +361,18 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) va_end(ap); } +void strbuf_add_uint(struct strbuf *sb, uintmax_t value) +{ + char buf[DIV_ROUND_UP(bitsizeof(value) * 10, 33)]; + char *end = buf + sizeof(buf); + char *p = end; + + do + *--p = "0123456789"[value % 10]; + while (value /= 10); + strbuf_add(sb, p, end - p); +} + static void add_lines(struct strbuf *out, const char *prefix, const char *buf, size_t size, diff --git a/strbuf.h b/strbuf.h index 06e284f9cc..1089ae687b 100644 --- a/strbuf.h +++ b/strbuf.h @@ -410,6 +410,12 @@ void strbuf_humanise_rate(struct strbuf *buf, off_t bytes); __attribute__((format (printf,2,3))) void strbuf_addf(struct strbuf *sb, const char *fmt, ...); + +/** + * Add an unsigned decimal number. + */ +void strbuf_add_uint(struct strbuf *sb, uintmax_t value); + /** * Add a formatted string prepended by a comment character and a * blank to the buffer. From 8feb5702163a32384d098e2c9ad3987928f8c447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 12 May 2026 13:56:01 +0200 Subject: [PATCH 2/4] cat-file: use strbuf_add_uint() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speed up printing of objectsize atoms by using the specialized function strbuf_add_uint() instead of the general-purpose function strbuf_addf(): Benchmark 1: ./git_main cat-file --batch-all-objects --batch-check='%(objectsize)' Time (mean ± σ): 751.7 ms ± 1.5 ms [User: 733.5 ms, System: 17.1 ms] Range (min … max): 750.5 ms … 755.0 ms 10 runs Benchmark 2: ./git cat-file --batch-all-objects --batch-check='%(objectsize)' Time (mean ± σ): 720.4 ms ± 0.4 ms [User: 701.9 ms, System: 16.7 ms] Range (min … max): 719.7 ms … 721.2 ms 10 runs Summary ./git cat-file --batch-all-objects --batch-check='%(objectsize)' ran 1.04 ± 0.00 times faster than ./git_main cat-file --batch-all-objects --batch-check='%(objectsize)' Benchmark 1: ./git_main cat-file --batch-all-objects --batch-check='%(objectsize:disk)' Time (mean ± σ): 404.6 ms ± 0.9 ms [User: 397.8 ms, System: 5.7 ms] Range (min … max): 403.3 ms … 405.9 ms 10 runs Benchmark 2: ./git cat-file --batch-all-objects --batch-check='%(objectsize:disk)' Time (mean ± σ): 378.3 ms ± 0.9 ms [User: 371.2 ms, System: 5.9 ms] Range (min … max): 376.8 ms … 380.2 ms 10 runs Summary ./git cat-file --batch-all-objects --batch-check='%(objectsize:disk)' ran 1.07 ± 0.00 times faster than ./git_main cat-file --batch-all-objects --batch-check='%(objectsize:disk)' Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index d9fbad5358..62160ca9d4 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -330,12 +330,12 @@ static int expand_atom(struct strbuf *sb, const char *atom, int len, if (data->mark_query) data->info.sizep = &data->size; else - strbuf_addf(sb, "%"PRIuMAX , (uintmax_t)data->size); + strbuf_add_uint(sb, data->size); } else if (is_atom("objectsize:disk", atom, len)) { if (data->mark_query) data->info.disk_sizep = &data->disk_size; else - strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size); + strbuf_add_uint(sb, data->disk_size); } else if (is_atom("rest", atom, len)) { if (data->mark_query) data->split_on_whitespace = 1; From f001b4ab3942cbaff4a39662294ee7191e2dbee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 12 May 2026 13:56:02 +0200 Subject: [PATCH 3/4] ls-files: use strbuf_add_uint() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speed up printing of objectsize values by using the specialized function strbuf_add_uint() as well as strbuf_insert() for padding instead of the general-purpose function strbuf_addf(). Here are the numbers I get when listing files in the Linux kernel repo: Benchmark 1: ./git_main -C ../linux ls-files --format='%(objectsize)' Time (mean ± σ): 257.3 ms ± 0.4 ms [User: 197.4 ms, System: 56.7 ms] Range (min … max): 256.7 ms … 258.1 ms 11 runs Benchmark 2: ./git -C ../linux ls-files --format='%(objectsize)' Time (mean ± σ): 253.4 ms ± 0.3 ms [User: 193.6 ms, System: 56.6 ms] Range (min … max): 253.0 ms … 253.8 ms 11 runs Benchmark 3: ./git_main -C ../linux ls-files --format='%(objectsize:padded)' Time (mean ± σ): 257.9 ms ± 0.3 ms [User: 198.0 ms, System: 56.6 ms] Range (min … max): 257.3 ms … 258.5 ms 11 runs Benchmark 4: ./git -C ../linux ls-files --format='%(objectsize:padded)' Time (mean ± σ): 254.6 ms ± 1.0 ms [User: 194.6 ms, System: 56.7 ms] Range (min … max): 253.7 ms … 256.8 ms 11 runs Summary ./git -C ../linux ls-files --format='%(objectsize)' ran 1.00 ± 0.00 times faster than ./git -C ../linux ls-files --format='%(objectsize:padded)' 1.02 ± 0.00 times faster than ./git_main -C ../linux ls-files --format='%(objectsize)' 1.02 ± 0.00 times faster than ./git_main -C ../linux ls-files --format='%(objectsize:padded)' Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index b148607f7a..c142ad4156 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -250,20 +250,23 @@ static void expand_objectsize(struct repository *repo, struct strbuf *line, const struct object_id *oid, const enum object_type type, unsigned int padded) { + static const char padding[] = " "; + size_t min_len = padded ? strlen(padding) : 0; + size_t orig_len = line->len; + size_t len; + if (type == OBJ_BLOB) { unsigned long size; if (odb_read_object_info(repo->objects, oid, &size) < 0) die(_("could not get object info about '%s'"), oid_to_hex(oid)); - if (padded) - strbuf_addf(line, "%7"PRIuMAX, (uintmax_t)size); - else - strbuf_addf(line, "%"PRIuMAX, (uintmax_t)size); - } else if (padded) { - strbuf_addf(line, "%7s", "-"); + strbuf_add_uint(line, size); } else { strbuf_addstr(line, "-"); } + len = line->len - orig_len; + if (len < min_len) + strbuf_insert(line, orig_len, padding, min_len - len); } static void show_ce_fmt(struct repository *repo, const struct cache_entry *ce, From 4f87748b0d25bdc92b76e453f086204808e8be87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 12 May 2026 13:56:03 +0200 Subject: [PATCH 4/4] ls-tree: use strbuf_add_uint() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Speed up printing of objectsize values by using the specialized function strbuf_add_uint() as well as strbuf_insert() for padding instead of the general-purpose function strbuf_addf(). Here are the numbers I get when listing objects in the Linux kernel repo: Benchmark 1: ./git_main -C ../linux ls-tree -r --format='%(objectsize)' HEAD Time (mean ± σ): 294.4 ms ± 0.4 ms [User: 231.5 ms, System: 59.4 ms] Range (min … max): 293.9 ms … 295.0 ms 10 runs Benchmark 2: ./git -C ../linux ls-tree -r --format='%(objectsize)' HEAD Time (mean ± σ): 291.2 ms ± 0.4 ms [User: 227.9 ms, System: 62.1 ms] Range (min … max): 290.6 ms … 292.0 ms 10 runs Benchmark 3: ./git_main -C ../linux ls-tree -r --format='%(objectsize:padded)' HEAD Time (mean ± σ): 295.3 ms ± 0.6 ms [User: 232.0 ms, System: 59.6 ms] Range (min … max): 294.3 ms … 296.3 ms 10 runs Benchmark 4: ./git -C ../linux ls-tree -r --format='%(objectsize:padded)' HEAD Time (mean ± σ): 291.9 ms ± 0.4 ms [User: 228.5 ms, System: 61.5 ms] Range (min … max): 291.2 ms … 292.3 ms 10 runs Summary ./git -C ../linux ls-tree -r --format='%(objectsize)' HEAD ran 1.00 ± 0.00 times faster than ./git -C ../linux ls-tree -r --format='%(objectsize:padded)' HEAD 1.01 ± 0.00 times faster than ./git_main -C ../linux ls-tree -r --format='%(objectsize)' HEAD 1.01 ± 0.00 times faster than ./git_main -C ../linux ls-tree -r --format='%(objectsize:padded)' HEAD Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/ls-tree.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index 113e4a960d..57846911ce 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -26,20 +26,23 @@ static const char * const ls_tree_usage[] = { static void expand_objectsize(struct strbuf *line, const struct object_id *oid, const enum object_type type, unsigned int padded) { + static const char padding[] = " "; + size_t min_len = padded ? strlen(padding) : 0; + size_t orig_len = line->len; + size_t len; + if (type == OBJ_BLOB) { unsigned long size; if (odb_read_object_info(the_repository->objects, oid, &size) < 0) die(_("could not get object info about '%s'"), oid_to_hex(oid)); - if (padded) - strbuf_addf(line, "%7"PRIuMAX, (uintmax_t)size); - else - strbuf_addf(line, "%"PRIuMAX, (uintmax_t)size); - } else if (padded) { - strbuf_addf(line, "%7s", "-"); + strbuf_add_uint(line, size); } else { strbuf_addstr(line, "-"); } + len = line->len - orig_len; + if (len < min_len) + strbuf_insert(line, orig_len, padding, min_len - len); } struct ls_tree_options {