From 6afedba8c97b5a8463e45ff801218433f8392d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Mon, 15 Oct 2018 00:01:48 +0000 Subject: [PATCH 01/15] object_id.cocci: match only expressions of type 'struct object_id' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Most of our semantic patches in 'contrib/coccinelle/object_id.cocci' turn calls of SHA1-specific functions into calls of their corresponding object_id counterparts, e.g. sha1_to_hex() to oid_to_hex(). These semantic patches look something like this: @@ expression E1; @@ - sha1_to_hex(E1.hash) + oid_to_hex(&E1) and match the access to the 'hash' field in any data type, not only in 'struct object_id', and, consquently, can produce wrong transformations. Case in point is the recent hash function transition patch "rerere: convert to use the_hash_algo" [1], which, among other things, renamed 'struct rerere_dir's 'sha1' field to 'hash', and then 'make coccicheck' started to suggest the following wrong transformations for 'rerere.c' [2]: - return sha1_to_hex(id->collection->hash); + return oid_to_hex(id->collection); and - DIR *dir = opendir(git_path("rr-cache/%s", sha1_to_hex(rr_dir->hash))); + DIR *dir = opendir(git_path("rr-cache/%s", oid_to_hex(rr_dir))); Avoid such wrong transformations by tightening semantic patches in 'object_id.cocci' to match only type of or pointers to 'struct object_id'. [1] https://public-inbox.org/git/20181008215701.779099-15-sandals@crustytoothpaste.net/ [2] https://travis-ci.org/git/git/jobs/440463476#L580 Signed-off-by: SZEDER Gábor Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- contrib/coccinelle/object_id.cocci | 117 ++++++++++++++++------------- 1 file changed, 63 insertions(+), 54 deletions(-) diff --git a/contrib/coccinelle/object_id.cocci b/contrib/coccinelle/object_id.cocci index d8bdb48712..6a7cf3e02d 100644 --- a/contrib/coccinelle/object_id.cocci +++ b/contrib/coccinelle/object_id.cocci @@ -1,119 +1,127 @@ @@ -expression E1; +struct object_id OID; @@ -- is_null_sha1(E1.hash) -+ is_null_oid(&E1) +- is_null_sha1(OID.hash) ++ is_null_oid(&OID) @@ -expression E1; +struct object_id *OIDPTR; @@ -- is_null_sha1(E1->hash) -+ is_null_oid(E1) +- is_null_sha1(OIDPTR->hash) ++ is_null_oid(OIDPTR) @@ -expression E1; +struct object_id OID; @@ -- sha1_to_hex(E1.hash) -+ oid_to_hex(&E1) +- sha1_to_hex(OID.hash) ++ oid_to_hex(&OID) @@ identifier f != oid_to_hex; -expression E1; +struct object_id *OIDPTR; @@ f(...) {<... -- sha1_to_hex(E1->hash) -+ oid_to_hex(E1) +- sha1_to_hex(OIDPTR->hash) ++ oid_to_hex(OIDPTR) ...>} @@ -expression E1, E2; +expression E; +struct object_id OID; @@ -- sha1_to_hex_r(E1, E2.hash) -+ oid_to_hex_r(E1, &E2) +- sha1_to_hex_r(E, OID.hash) ++ oid_to_hex_r(E, &OID) @@ identifier f != oid_to_hex_r; -expression E1, E2; +expression E; +struct object_id *OIDPTR; @@ f(...) {<... -- sha1_to_hex_r(E1, E2->hash) -+ oid_to_hex_r(E1, E2) +- sha1_to_hex_r(E, OIDPTR->hash) ++ oid_to_hex_r(E, OIDPTR) ...>} @@ -expression E1; +struct object_id OID; @@ -- hashclr(E1.hash) -+ oidclr(&E1) +- hashclr(OID.hash) ++ oidclr(&OID) @@ identifier f != oidclr; -expression E1; +struct object_id *OIDPTR; @@ f(...) {<... -- hashclr(E1->hash) -+ oidclr(E1) +- hashclr(OIDPTR->hash) ++ oidclr(OIDPTR) ...>} @@ -expression E1, E2; +struct object_id OID1, OID2; @@ -- hashcmp(E1.hash, E2.hash) -+ oidcmp(&E1, &E2) +- hashcmp(OID1.hash, OID2.hash) ++ oidcmp(&OID1, &OID2) @@ identifier f != oidcmp; -expression E1, E2; +struct object_id *OIDPTR1, OIDPTR2; @@ f(...) {<... -- hashcmp(E1->hash, E2->hash) -+ oidcmp(E1, E2) +- hashcmp(OIDPTR1->hash, OIDPTR2->hash) ++ oidcmp(OIDPTR1, OIDPTR2) ...>} @@ -expression E1, E2; +struct object_id *OIDPTR; +struct object_id OID; @@ -- hashcmp(E1->hash, E2.hash) -+ oidcmp(E1, &E2) +- hashcmp(OIDPTR->hash, OID.hash) ++ oidcmp(OIDPTR, &OID) @@ -expression E1, E2; +struct object_id *OIDPTR; +struct object_id OID; @@ -- hashcmp(E1.hash, E2->hash) -+ oidcmp(&E1, E2) +- hashcmp(OID.hash, OIDPTR->hash) ++ oidcmp(&OID, OIDPTR) @@ -expression E1, E2; +struct object_id OID1, OID2; @@ -- hashcpy(E1.hash, E2.hash) -+ oidcpy(&E1, &E2) +- hashcpy(OID1.hash, OID2.hash) ++ oidcpy(&OID1, &OID2) @@ identifier f != oidcpy; -expression E1, E2; +struct object_id *OIDPTR1; +struct object_id *OIDPTR2; @@ f(...) {<... -- hashcpy(E1->hash, E2->hash) -+ oidcpy(E1, E2) +- hashcpy(OIDPTR1->hash, OIDPTR2->hash) ++ oidcpy(OIDPTR1, OIDPTR2) ...>} @@ -expression E1, E2; +struct object_id *OIDPTR; +struct object_id OID; @@ -- hashcpy(E1->hash, E2.hash) -+ oidcpy(E1, &E2) +- hashcpy(OIDPTR->hash, OID.hash) ++ oidcpy(OIDPTR, &OID) @@ -expression E1, E2; +struct object_id *OIDPTR; +struct object_id OID; @@ -- hashcpy(E1.hash, E2->hash) -+ oidcpy(&E1, E2) +- hashcpy(OID.hash, OIDPTR->hash) ++ oidcpy(&OID, OIDPTR) @@ -expression E1, E2; +struct object_id *OIDPTR1; +struct object_id *OIDPTR2; @@ -- oidcmp(E1, E2) == 0 -+ oideq(E1, E2) +- oidcmp(OIDPTR1, OIDPTR2) == 0 ++ oideq(OIDPTR1, OIDPTR2) @@ identifier f != hasheq; @@ -125,10 +133,11 @@ expression E1, E2; ...>} @@ -expression E1, E2; +struct object_id *OIDPTR1; +struct object_id *OIDPTR2; @@ -- oidcmp(E1, E2) != 0 -+ !oideq(E1, E2) +- oidcmp(OIDPTR1, OIDPTR2) != 0 ++ !oideq(OIDPTR1, OIDPTR2) @@ identifier f != hasheq; From 825544a3511c679ea1b51ffaf1cd565e8ed035ec Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:49 +0000 Subject: [PATCH 02/15] pack-bitmap-write: use GIT_MAX_RAWSZ for allocation Reviewed-by: Stefan Beller Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pack-bitmap-write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index fc82f37a02..6f0c78d6aa 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -37,7 +37,7 @@ struct bitmap_writer { struct progress *progress; int show_progress; - unsigned char pack_checksum[20]; + unsigned char pack_checksum[GIT_MAX_RAWSZ]; }; static struct bitmap_writer writer; From 2f0c9e9a9bbe8015da2c0838f3051c0a336dee26 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:50 +0000 Subject: [PATCH 03/15] builtin/repack: replace hard-coded constants Note that while the error messages here are not translated, the end user should never see them. We invoke git pack-objects shortly before both invocations, so we can be fairly certain that the data we're receiving is in fact valid. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index c6a7943d5c..0223f2880c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -235,8 +235,8 @@ static void repack_promisor_objects(const struct pack_objects_args *args, while (strbuf_getline_lf(&line, out) != EOF) { char *promisor_name; int fd; - if (line.len != 40) - die("repack: Expecting 40 character sha1 lines only from pack-objects."); + if (line.len != the_hash_algo->hexsz) + die("repack: Expecting full hex object ID lines only from pack-objects."); string_list_append(names, line.buf); /* @@ -407,8 +407,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix) out = xfdopen(cmd.out, "r"); while (strbuf_getline_lf(&line, out) != EOF) { - if (line.len != 40) - die("repack: Expecting 40 character sha1 lines only from pack-objects."); + if (line.len != the_hash_algo->hexsz) + die("repack: Expecting full hex object ID lines only from pack-objects."); string_list_append(&names, line.buf); } fclose(out); @@ -535,14 +535,15 @@ int cmd_repack(int argc, const char **argv, const char *prefix) reprepare_packed_git(the_repository); if (delete_redundant) { + const int hexsz = the_hash_algo->hexsz; int opts = 0; string_list_sort(&names); for_each_string_list_item(item, &existing_packs) { char *sha1; size_t len = strlen(item->string); - if (len < 40) + if (len < hexsz) continue; - sha1 = item->string + len - 40; + sha1 = item->string + len - hexsz; if (!string_list_has_string(&names, sha1)) remove_redundant_pack(packdir, item->string); } From 58ce21b819e582db2a96d170fdd24d3f7bc6c4d0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:51 +0000 Subject: [PATCH 04/15] builtin/mktree: remove hard-coded constant Instead of using a hard-coded constant for the size of a hex object ID, switch to use the computed pointer from parse_oid_hex that points after the parsed object ID. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/mktree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/mktree.c b/builtin/mktree.c index 2dc4ad6ba8..94e82b8504 100644 --- a/builtin/mktree.c +++ b/builtin/mktree.c @@ -98,7 +98,7 @@ static void mktree_line(char *buf, size_t len, int nul_term_line, int allow_miss *ntr++ = 0; /* now at the beginning of SHA1 */ - path = ntr + 41; /* at the beginning of name */ + path = (char *)p + 1; /* at the beginning of name */ if (!nul_term_line && path[0] == '"') { struct strbuf p_uq = STRBUF_INIT; if (unquote_c_style(&p_uq, path, NULL)) From 7b5e614e2acc2f4e6b2f1ea4eb93dd430d350abd Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:52 +0000 Subject: [PATCH 05/15] builtin/fetch-pack: remove constants with parse_oid_hex Instead of using GIT_SHA1_HEXSZ, use parse_oid_hex to compute a pointer and use that in comparisons. This is both simpler to read and works independent of the hash length. Update references to SHA-1 in the same function to refer to object IDs instead. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 1a1bc63566..63e69a5801 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -16,13 +16,14 @@ static void add_sought_entry(struct ref ***sought, int *nr, int *alloc, { struct ref *ref; struct object_id oid; + const char *p; - if (!get_oid_hex(name, &oid)) { - if (name[GIT_SHA1_HEXSZ] == ' ') { - /* , find refname */ - name += GIT_SHA1_HEXSZ + 1; - } else if (name[GIT_SHA1_HEXSZ] == '\0') { - ; /* , leave sha1 as name */ + if (!parse_oid_hex(name, &oid, &p)) { + if (*p == ' ') { + /* , find refname */ + name = p + 1; + } else if (*p == '\0') { + ; /* , leave oid as name */ } else { /* , clear cruft from oid */ oidclr(&oid); From fa130802d9ccde50e4097b2b1c5324990be3ffb4 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:53 +0000 Subject: [PATCH 06/15] pack-revindex: express constants in terms of the_hash_algo Express the various constants used in terms of the_hash_algo. While we're at it, fix a comment style issue as well. Reviewed-by: Stefan Beller Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- pack-revindex.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pack-revindex.c b/pack-revindex.c index bb521cf7fb..3c58784a5f 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -122,13 +122,14 @@ static void create_pack_revindex(struct packed_git *p) unsigned num_ent = p->num_objects; unsigned i; const char *index = p->index_data; + const unsigned hashsz = the_hash_algo->rawsz; ALLOC_ARRAY(p->revindex, num_ent + 1); index += 4 * 256; if (p->index_version > 1) { const uint32_t *off_32 = - (uint32_t *)(index + 8 + p->num_objects * (20 + 4)); + (uint32_t *)(index + 8 + p->num_objects * (hashsz + 4)); const uint32_t *off_64 = off_32 + p->num_objects; for (i = 0; i < num_ent; i++) { uint32_t off = ntohl(*off_32++); @@ -142,16 +143,17 @@ static void create_pack_revindex(struct packed_git *p) } } else { for (i = 0; i < num_ent; i++) { - uint32_t hl = *((uint32_t *)(index + 24 * i)); + uint32_t hl = *((uint32_t *)(index + (hashsz + 4) * i)); p->revindex[i].offset = ntohl(hl); p->revindex[i].nr = i; } } - /* This knows the pack format -- the 20-byte trailer + /* + * This knows the pack format -- the hash trailer * follows immediately after the last object data. */ - p->revindex[num_ent].offset = p->pack_size - 20; + p->revindex[num_ent].offset = p->pack_size - hashsz; p->revindex[num_ent].nr = -1; sort_revindex(p->revindex, num_ent, p->pack_size); } From 268babd6fb06cfb897d48f7a9adb4828f5f6b45f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:54 +0000 Subject: [PATCH 07/15] packfile: express constants in terms of the_hash_algo Replace uses of GIT_SHA1_RAWSZ with references to the_hash_algo to avoid dependence on a particular hash length. It's likely that in the future, we'll update the pack format to indicate what hash algorithm it uses, and then this code will change. However, at least on an interim basis, make it easier to develop on a pure SHA-256 Git by using the_hash_algo here. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- packfile.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packfile.c b/packfile.c index 841b36182f..17f993b5bf 100644 --- a/packfile.c +++ b/packfile.c @@ -1121,13 +1121,14 @@ int unpack_object_header(struct packed_git *p, void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1) { unsigned i; + const unsigned hashsz = the_hash_algo->rawsz; for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i)) + if (hasheq(sha1, p->bad_object_sha1 + hashsz * i)) return; p->bad_object_sha1 = xrealloc(p->bad_object_sha1, st_mult(GIT_MAX_RAWSZ, st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1); + hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1); p->num_bad_objects++; } From 49d166081bb5ddefa57fd97fd517449264ee5246 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:55 +0000 Subject: [PATCH 08/15] refs/packed-backend: express constants using the_hash_algo Switch uses of GIT_SHA1_HEXSZ to use the_hash_algo so that they are appropriate for the any given hash length. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- refs/packed-backend.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/refs/packed-backend.c b/refs/packed-backend.c index 74e2996e93..c01c7f5901 100644 --- a/refs/packed-backend.c +++ b/refs/packed-backend.c @@ -274,8 +274,8 @@ struct snapshot_record { static int cmp_packed_ref_records(const void *v1, const void *v2) { const struct snapshot_record *e1 = v1, *e2 = v2; - const char *r1 = e1->start + GIT_SHA1_HEXSZ + 1; - const char *r2 = e2->start + GIT_SHA1_HEXSZ + 1; + const char *r1 = e1->start + the_hash_algo->hexsz + 1; + const char *r2 = e2->start + the_hash_algo->hexsz + 1; while (1) { if (*r1 == '\n') @@ -297,7 +297,7 @@ static int cmp_packed_ref_records(const void *v1, const void *v2) */ static int cmp_record_to_refname(const char *rec, const char *refname) { - const char *r1 = rec + GIT_SHA1_HEXSZ + 1; + const char *r1 = rec + the_hash_algo->hexsz + 1; const char *r2 = refname; while (1) { @@ -344,7 +344,7 @@ static void sort_snapshot(struct snapshot *snapshot) if (!eol) /* The safety check should prevent this. */ BUG("unterminated line found in packed-refs"); - if (eol - pos < GIT_SHA1_HEXSZ + 2) + if (eol - pos < the_hash_algo->hexsz + 2) die_invalid_line(snapshot->refs->path, pos, eof - pos); eol++; @@ -456,7 +456,7 @@ static void verify_buffer_safe(struct snapshot *snapshot) return; last_line = find_start_of_record(start, eof - 1); - if (*(eof - 1) != '\n' || eof - last_line < GIT_SHA1_HEXSZ + 2) + if (*(eof - 1) != '\n' || eof - last_line < the_hash_algo->hexsz + 2) die_invalid_line(snapshot->refs->path, last_line, eof - last_line); } @@ -796,7 +796,7 @@ static int next_record(struct packed_ref_iterator *iter) iter->base.flags = REF_ISPACKED; - if (iter->eof - p < GIT_SHA1_HEXSZ + 2 || + if (iter->eof - p < the_hash_algo->hexsz + 2 || parse_oid_hex(p, &iter->oid, &p) || !isspace(*p++)) die_invalid_line(iter->snapshot->refs->path, @@ -826,7 +826,7 @@ static int next_record(struct packed_ref_iterator *iter) if (iter->pos < iter->eof && *iter->pos == '^') { p = iter->pos + 1; - if (iter->eof - p < GIT_SHA1_HEXSZ + 1 || + if (iter->eof - p < the_hash_algo->hexsz + 1 || parse_oid_hex(p, &iter->peeled, &p) || *p++ != '\n') die_invalid_line(iter->snapshot->refs->path, From f690b6b03049e8f564aa0a1abbde5ec0731c5774 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:56 +0000 Subject: [PATCH 09/15] upload-pack: express constants in terms of the_hash_algo Convert all uses of the GIT_SHA1_HEXSZ to use the_hash_algo so that they are appropriate for any given hash length. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- upload-pack.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 62a1000f44..1aae5dd828 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -443,6 +443,7 @@ static int do_reachable_revlist(struct child_process *cmd, struct object *o; char namebuf[GIT_MAX_HEXSZ + 2]; /* ^ + hash + LF */ int i; + const unsigned hexsz = the_hash_algo->hexsz; cmd->argv = argv; cmd->git_cmd = 1; @@ -461,7 +462,7 @@ static int do_reachable_revlist(struct child_process *cmd, goto error; namebuf[0] = '^'; - namebuf[GIT_SHA1_HEXSZ + 1] = '\n'; + namebuf[hexsz + 1] = '\n'; for (i = get_max_object_index(); 0 < i; ) { o = get_indexed_object(--i); if (!o) @@ -470,11 +471,11 @@ static int do_reachable_revlist(struct child_process *cmd, o->flags &= ~TMP_MARK; if (!is_our_ref(o)) continue; - memcpy(namebuf + 1, oid_to_hex(&o->oid), GIT_SHA1_HEXSZ); - if (write_in_full(cmd->in, namebuf, GIT_SHA1_HEXSZ + 2) < 0) + memcpy(namebuf + 1, oid_to_hex(&o->oid), hexsz); + if (write_in_full(cmd->in, namebuf, hexsz + 2) < 0) goto error; } - namebuf[GIT_SHA1_HEXSZ] = '\n'; + namebuf[hexsz] = '\n'; for (i = 0; i < src->nr; i++) { o = src->objects[i].item; if (is_our_ref(o)) { @@ -484,8 +485,8 @@ static int do_reachable_revlist(struct child_process *cmd, } if (reachable && o->type == OBJ_COMMIT) o->flags |= TMP_MARK; - memcpy(namebuf, oid_to_hex(&o->oid), GIT_SHA1_HEXSZ); - if (write_in_full(cmd->in, namebuf, GIT_SHA1_HEXSZ + 1) < 0) + memcpy(namebuf, oid_to_hex(&o->oid), hexsz); + if (write_in_full(cmd->in, namebuf, hexsz + 1) < 0) goto error; } close(cmd->in); From fbd0e37cde633221d5b3ca239f26cb7d005da5c8 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:57 +0000 Subject: [PATCH 10/15] transport: use parse_oid_hex instead of a constant Use parse_oid_hex to compute a pointer instead of using GIT_SHA1_HEXSZ. This simplifies the code and makes it independent of the hash length. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- transport.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/transport.c b/transport.c index 1c76d64aba..44b9ddf670 100644 --- a/transport.c +++ b/transport.c @@ -1346,15 +1346,16 @@ static void read_alternate_refs(const char *path, fh = xfdopen(cmd.out, "r"); while (strbuf_getline_lf(&line, fh) != EOF) { struct object_id oid; + const char *p; - if (get_oid_hex(line.buf, &oid) || - line.buf[GIT_SHA1_HEXSZ] != ' ') { + if (parse_oid_hex(line.buf, &oid, &p) || + *p != ' ') { warning(_("invalid line while parsing alternate refs: %s"), line.buf); break; } - cb(line.buf + GIT_SHA1_HEXSZ + 1, &oid, data); + cb(p + 1, &oid, data); } fclose(fh); From d8a3a690204ec8583b536e562d46632cae5cfe03 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:58 +0000 Subject: [PATCH 11/15] tag: express constant in terms of the_hash_algo Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- tag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tag.c b/tag.c index 1db663d716..7445b8f6ea 100644 --- a/tag.c +++ b/tag.c @@ -144,7 +144,7 @@ int parse_tag_buffer(struct repository *r, struct tag *item, const void *data, u return 0; item->object.parsed = 1; - if (size < GIT_SHA1_HEXSZ + 24) + if (size < the_hash_algo->hexsz + 24) return -1; if (memcmp("object ", bufptr, 7) || parse_oid_hex(bufptr + 7, &oid, &bufptr) || *bufptr++ != '\n') return -1; From 93eb00f719a2d36d31a4ba7b6eeae9f6302314e9 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:01:59 +0000 Subject: [PATCH 12/15] apply: replace hard-coded constants Replace several 40-based constants with references to GIT_MAX_HEXSZ or the_hash_algo, as appropriate. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- apply.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/apply.c b/apply.c index e485fbc6bc..792ecea36a 100644 --- a/apply.c +++ b/apply.c @@ -223,8 +223,8 @@ struct patch { struct fragment *fragments; char *result; size_t resultsize; - char old_sha1_prefix[41]; - char new_sha1_prefix[41]; + char old_sha1_prefix[GIT_MAX_HEXSZ + 1]; + char new_sha1_prefix[GIT_MAX_HEXSZ + 1]; struct patch *next; /* three-way fallback result */ @@ -1093,9 +1093,10 @@ static int gitdiff_index(struct apply_state *state, */ const char *ptr, *eol; int len; + const unsigned hexsz = the_hash_algo->hexsz; ptr = strchr(line, '.'); - if (!ptr || ptr[1] != '.' || 40 < ptr - line) + if (!ptr || ptr[1] != '.' || hexsz < ptr - line) return 0; len = ptr - line; memcpy(patch->old_sha1_prefix, line, len); @@ -1109,7 +1110,7 @@ static int gitdiff_index(struct apply_state *state, ptr = eol; len = ptr - line; - if (40 < len) + if (hexsz < len) return 0; memcpy(patch->new_sha1_prefix, line, len); patch->new_sha1_prefix[len] = 0; @@ -3142,13 +3143,14 @@ static int apply_binary(struct apply_state *state, { const char *name = patch->old_name ? patch->old_name : patch->new_name; struct object_id oid; + const unsigned hexsz = the_hash_algo->hexsz; /* * For safety, we require patch index line to contain - * full 40-byte textual SHA1 for old and new, at least for now. + * full hex textual object ID for old and new, at least for now. */ - if (strlen(patch->old_sha1_prefix) != 40 || - strlen(patch->new_sha1_prefix) != 40 || + if (strlen(patch->old_sha1_prefix) != hexsz || + strlen(patch->new_sha1_prefix) != hexsz || get_oid_hex(patch->old_sha1_prefix, &oid) || get_oid_hex(patch->new_sha1_prefix, &oid)) return error(_("cannot apply binary patch to '%s' " @@ -4055,7 +4057,7 @@ static int preimage_oid_in_gitlink_patch(struct patch *p, struct object_id *oid) starts_with(++preimage, heading) && /* does it record full SHA-1? */ !get_oid_hex(preimage + sizeof(heading) - 1, oid) && - preimage[sizeof(heading) + GIT_SHA1_HEXSZ - 1] == '\n' && + preimage[sizeof(heading) + the_hash_algo->hexsz - 1] == '\n' && /* does the abbreviated name on the index line agree with it? */ starts_with(preimage + sizeof(heading) - 1, p->old_sha1_prefix)) return 0; /* it all looks fine */ From eccb5a5f3d8d49ec9b4645aac94bffb7e504f10f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:02:00 +0000 Subject: [PATCH 13/15] apply: rename new_sha1_prefix and old_sha1_prefix Rename these structure members to "new_oid_prefix" and "old_oid_prefix". Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- apply.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/apply.c b/apply.c index 792ecea36a..b9eb02ec12 100644 --- a/apply.c +++ b/apply.c @@ -223,8 +223,8 @@ struct patch { struct fragment *fragments; char *result; size_t resultsize; - char old_sha1_prefix[GIT_MAX_HEXSZ + 1]; - char new_sha1_prefix[GIT_MAX_HEXSZ + 1]; + char old_oid_prefix[GIT_MAX_HEXSZ + 1]; + char new_oid_prefix[GIT_MAX_HEXSZ + 1]; struct patch *next; /* three-way fallback result */ @@ -1099,8 +1099,8 @@ static int gitdiff_index(struct apply_state *state, if (!ptr || ptr[1] != '.' || hexsz < ptr - line) return 0; len = ptr - line; - memcpy(patch->old_sha1_prefix, line, len); - patch->old_sha1_prefix[len] = 0; + memcpy(patch->old_oid_prefix, line, len); + patch->old_oid_prefix[len] = 0; line = ptr + 2; ptr = strchr(line, ' '); @@ -1112,8 +1112,8 @@ static int gitdiff_index(struct apply_state *state, if (hexsz < len) return 0; - memcpy(patch->new_sha1_prefix, line, len); - patch->new_sha1_prefix[len] = 0; + memcpy(patch->new_oid_prefix, line, len); + patch->new_oid_prefix[len] = 0; if (*ptr == ' ') return gitdiff_oldmode(state, ptr + 1, patch); return 0; @@ -2205,7 +2205,7 @@ static void reverse_patches(struct patch *p) SWAP(p->new_mode, p->old_mode); SWAP(p->is_new, p->is_delete); SWAP(p->lines_added, p->lines_deleted); - SWAP(p->old_sha1_prefix, p->new_sha1_prefix); + SWAP(p->old_oid_prefix, p->new_oid_prefix); for (; frag; frag = frag->next) { SWAP(frag->newpos, frag->oldpos); @@ -3149,10 +3149,10 @@ static int apply_binary(struct apply_state *state, * For safety, we require patch index line to contain * full hex textual object ID for old and new, at least for now. */ - if (strlen(patch->old_sha1_prefix) != hexsz || - strlen(patch->new_sha1_prefix) != hexsz || - get_oid_hex(patch->old_sha1_prefix, &oid) || - get_oid_hex(patch->new_sha1_prefix, &oid)) + if (strlen(patch->old_oid_prefix) != hexsz || + strlen(patch->new_oid_prefix) != hexsz || + get_oid_hex(patch->old_oid_prefix, &oid) || + get_oid_hex(patch->new_oid_prefix, &oid)) return error(_("cannot apply binary patch to '%s' " "without full index line"), name); @@ -3162,7 +3162,7 @@ static int apply_binary(struct apply_state *state, * applies to. */ hash_object_file(img->buf, img->len, blob_type, &oid); - if (strcmp(oid_to_hex(&oid), patch->old_sha1_prefix)) + if (strcmp(oid_to_hex(&oid), patch->old_oid_prefix)) return error(_("the patch applies to '%s' (%s), " "which does not match the " "current contents."), @@ -3175,7 +3175,7 @@ static int apply_binary(struct apply_state *state, "'%s' but it is not empty"), name); } - get_oid_hex(patch->new_sha1_prefix, &oid); + get_oid_hex(patch->new_oid_prefix, &oid); if (is_null_oid(&oid)) { clear_image(img); return 0; /* deletion patch */ @@ -3191,7 +3191,7 @@ static int apply_binary(struct apply_state *state, if (!result) return error(_("the necessary postimage %s for " "'%s' cannot be read"), - patch->new_sha1_prefix, name); + patch->new_oid_prefix, name); clear_image(img); img->buf = result; img->len = size; @@ -3207,9 +3207,9 @@ static int apply_binary(struct apply_state *state, /* verify that the result matches */ hash_object_file(img->buf, img->len, blob_type, &oid); - if (strcmp(oid_to_hex(&oid), patch->new_sha1_prefix)) + if (strcmp(oid_to_hex(&oid), patch->new_oid_prefix)) return error(_("binary patch to '%s' creates incorrect result (expecting %s, got %s)"), - name, patch->new_sha1_prefix, oid_to_hex(&oid)); + name, patch->new_oid_prefix, oid_to_hex(&oid)); } return 0; @@ -3565,7 +3565,7 @@ static int try_threeway(struct apply_state *state, /* Preimage the patch was prepared for */ if (patch->is_new) write_object_file("", 0, blob_type, &pre_oid); - else if (get_oid(patch->old_sha1_prefix, &pre_oid) || + else if (get_oid(patch->old_oid_prefix, &pre_oid) || read_blob_object(&buf, &pre_oid, patch->old_mode)) return error(_("repository lacks the necessary blob to fall back on 3-way merge.")); @@ -4059,11 +4059,11 @@ static int preimage_oid_in_gitlink_patch(struct patch *p, struct object_id *oid) !get_oid_hex(preimage + sizeof(heading) - 1, oid) && preimage[sizeof(heading) + the_hash_algo->hexsz - 1] == '\n' && /* does the abbreviated name on the index line agree with it? */ - starts_with(preimage + sizeof(heading) - 1, p->old_sha1_prefix)) + starts_with(preimage + sizeof(heading) - 1, p->old_oid_prefix)) return 0; /* it all looks fine */ /* we may have full object name on the index line */ - return get_oid_hex(p->old_sha1_prefix, oid); + return get_oid_hex(p->old_oid_prefix, oid); } /* Build an index that contains just the files needed for a 3way merge */ @@ -4092,7 +4092,7 @@ static int build_fake_ancestor(struct apply_state *state, struct patch *list) else return error(_("sha1 information is lacking or " "useless for submodule %s"), name); - } else if (!get_oid_blob(patch->old_sha1_prefix, &oid)) { + } else if (!get_oid_blob(patch->old_oid_prefix, &oid)) { ; /* ok */ } else if (!patch->lines_added && !patch->lines_deleted) { /* mode-only change: update the current */ From dda634687717fd8aafd54cae9ae3d9b2902e276e Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:02:01 +0000 Subject: [PATCH 14/15] submodule: make zero-oid comparison hash function agnostic With SHA-256, the length of the all-zeros object ID is longer. Add a function to git-submodule.sh to check if a full hex object ID is the all-zeros value, and use it to check the output we're parsing from git diff-files or diff-index. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- git-submodule.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/git-submodule.sh b/git-submodule.sh index 1b568e29b9..c09eb3e03d 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -82,6 +82,11 @@ isnumber() n=$(($1 + 0)) 2>/dev/null && test "$n" = "$1" } +# Given a full hex object ID, is this the zero OID? +is_zero_oid () { + echo "$1" | sane_egrep '^0+$' >/dev/null 2>&1 +} + # Sanitize the local git environment for use within a submodule. We # can't simply use clear_local_git_env since we want to preserve some # of the settings from GIT_CONFIG_PARAMETERS. @@ -780,7 +785,7 @@ cmd_summary() { while read -r mod_src mod_dst sha1_src sha1_dst status name do if test -z "$cached" && - test $sha1_dst = 0000000000000000000000000000000000000000 + is_zero_oid $sha1_dst then case "$mod_dst" in 160000) From 0d7c419a94b7524ac854d5a6002b7541abab4f12 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 15 Oct 2018 00:02:02 +0000 Subject: [PATCH 15/15] rerere: convert to use the_hash_algo Since this data is stored in the .git directory, it makes sense for us to use the same hash algorithm for it as for everything else. Convert the remaining uses of SHA-1 to use the_hash_algo. Use GIT_MAX_RAWSZ for allocations. Rename various struct members, local variables, and a function to be named "hash" instead of "sha1". Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- rerere.c | 81 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/rerere.c b/rerere.c index 7aa149e849..ceb98015ff 100644 --- a/rerere.c +++ b/rerere.c @@ -29,7 +29,7 @@ static int rerere_dir_alloc; #define RR_HAS_POSTIMAGE 1 #define RR_HAS_PREIMAGE 2 static struct rerere_dir { - unsigned char sha1[20]; + unsigned char hash[GIT_MAX_HEXSZ]; int status_alloc, status_nr; unsigned char *status; } **rerere_dir; @@ -52,7 +52,7 @@ static void free_rerere_id(struct string_list_item *item) static const char *rerere_id_hex(const struct rerere_id *id) { - return sha1_to_hex(id->collection->sha1); + return sha1_to_hex(id->collection->hash); } static void fit_variant(struct rerere_dir *rr_dir, int variant) @@ -115,7 +115,7 @@ static int is_rr_file(const char *name, const char *filename, int *variant) static void scan_rerere_dir(struct rerere_dir *rr_dir) { struct dirent *de; - DIR *dir = opendir(git_path("rr-cache/%s", sha1_to_hex(rr_dir->sha1))); + DIR *dir = opendir(git_path("rr-cache/%s", sha1_to_hex(rr_dir->hash))); if (!dir) return; @@ -133,24 +133,24 @@ static void scan_rerere_dir(struct rerere_dir *rr_dir) closedir(dir); } -static const unsigned char *rerere_dir_sha1(size_t i, void *table) +static const unsigned char *rerere_dir_hash(size_t i, void *table) { struct rerere_dir **rr_dir = table; - return rr_dir[i]->sha1; + return rr_dir[i]->hash; } static struct rerere_dir *find_rerere_dir(const char *hex) { - unsigned char sha1[20]; + unsigned char hash[GIT_MAX_RAWSZ]; struct rerere_dir *rr_dir; int pos; - if (get_sha1_hex(hex, sha1)) + if (get_sha1_hex(hex, hash)) return NULL; /* BUG */ - pos = sha1_pos(sha1, rerere_dir, rerere_dir_nr, rerere_dir_sha1); + pos = sha1_pos(hash, rerere_dir, rerere_dir_nr, rerere_dir_hash); if (pos < 0) { rr_dir = xmalloc(sizeof(*rr_dir)); - hashcpy(rr_dir->sha1, sha1); + hashcpy(rr_dir->hash, hash); rr_dir->status = NULL; rr_dir->status_nr = 0; rr_dir->status_alloc = 0; @@ -207,26 +207,27 @@ static void read_rr(struct string_list *rr) return; while (!strbuf_getwholeline(&buf, in, '\0')) { char *path; - unsigned char sha1[20]; + unsigned char hash[GIT_MAX_RAWSZ]; struct rerere_id *id; int variant; + const unsigned hexsz = the_hash_algo->hexsz; /* There has to be the hash, tab, path and then NUL */ - if (buf.len < 42 || get_sha1_hex(buf.buf, sha1)) + if (buf.len < hexsz + 2 || get_sha1_hex(buf.buf, hash)) die(_("corrupt MERGE_RR")); - if (buf.buf[40] != '.') { + if (buf.buf[hexsz] != '.') { variant = 0; - path = buf.buf + 40; + path = buf.buf + hexsz; } else { errno = 0; - variant = strtol(buf.buf + 41, &path, 10); + variant = strtol(buf.buf + hexsz + 1, &path, 10); if (errno) die(_("corrupt MERGE_RR")); } if (*(path++) != '\t') die(_("corrupt MERGE_RR")); - buf.buf[40] = '\0'; + buf.buf[hexsz] = '\0'; id = new_rerere_id_hex(buf.buf); id->variant = variant; string_list_insert(rr, path)->util = id; @@ -360,7 +361,7 @@ static void rerere_strbuf_putconflict(struct strbuf *buf, int ch, size_t size) } static int handle_conflict(struct strbuf *out, struct rerere_io *io, - int marker_size, git_SHA_CTX *ctx) + int marker_size, git_hash_ctx *ctx) { enum { RR_SIDE_1 = 0, RR_SIDE_2, RR_ORIGINAL @@ -398,10 +399,12 @@ static int handle_conflict(struct strbuf *out, struct rerere_io *io, strbuf_addbuf(out, &two); rerere_strbuf_putconflict(out, '>', marker_size); if (ctx) { - git_SHA1_Update(ctx, one.buf ? one.buf : "", - one.len + 1); - git_SHA1_Update(ctx, two.buf ? two.buf : "", - two.len + 1); + the_hash_algo->update_fn(ctx, one.buf ? + one.buf : "", + one.len + 1); + the_hash_algo->update_fn(ctx, two.buf ? + two.buf : "", + two.len + 1); } break; } else if (hunk == RR_SIDE_1) @@ -430,18 +433,18 @@ static int handle_conflict(struct strbuf *out, struct rerere_io *io, * Return 1 if conflict hunks are found, 0 if there are no conflict * hunks and -1 if an error occured. */ -static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_size) +static int handle_path(unsigned char *hash, struct rerere_io *io, int marker_size) { - git_SHA_CTX ctx; + git_hash_ctx ctx; struct strbuf buf = STRBUF_INIT, out = STRBUF_INIT; int has_conflicts = 0; - if (sha1) - git_SHA1_Init(&ctx); + if (hash) + the_hash_algo->init_fn(&ctx); while (!io->getline(&buf, io)) { if (is_cmarker(buf.buf, '<', marker_size)) { has_conflicts = handle_conflict(&out, io, marker_size, - sha1 ? &ctx : NULL); + hash ? &ctx : NULL); if (has_conflicts < 0) break; rerere_io_putmem(out.buf, out.len, io); @@ -452,8 +455,8 @@ static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_siz strbuf_release(&buf); strbuf_release(&out); - if (sha1) - git_SHA1_Final(sha1, &ctx); + if (hash) + the_hash_algo->final_fn(hash, &ctx); return has_conflicts; } @@ -462,7 +465,7 @@ static int handle_path(unsigned char *sha1, struct rerere_io *io, int marker_siz * Scan the path for conflicts, do the "handle_path()" thing above, and * return the number of conflict hunks found. */ -static int handle_file(const char *path, unsigned char *sha1, const char *output) +static int handle_file(const char *path, unsigned char *hash, const char *output) { int has_conflicts = 0; struct rerere_io_file io; @@ -484,7 +487,7 @@ static int handle_file(const char *path, unsigned char *sha1, const char *output } } - has_conflicts = handle_path(sha1, (struct rerere_io *)&io, marker_size); + has_conflicts = handle_path(hash, (struct rerere_io *)&io, marker_size); fclose(io.input); if (io.io.wrerror) @@ -814,7 +817,7 @@ static int do_plain_rerere(struct string_list *rr, int fd) */ for (i = 0; i < conflict.nr; i++) { struct rerere_id *id; - unsigned char sha1[20]; + unsigned char hash[GIT_MAX_RAWSZ]; const char *path = conflict.items[i].string; int ret; @@ -823,7 +826,7 @@ static int do_plain_rerere(struct string_list *rr, int fd) * conflict ID. No need to write anything out * yet. */ - ret = handle_file(path, sha1, NULL); + ret = handle_file(path, hash, NULL); if (ret != 0 && string_list_has_string(rr, path)) { remove_variant(string_list_lookup(rr, path)->util); string_list_remove(rr, path, 1); @@ -831,7 +834,7 @@ static int do_plain_rerere(struct string_list *rr, int fd) if (ret < 1) continue; - id = new_rerere_id(sha1); + id = new_rerere_id(hash); string_list_insert(rr, path)->util = id; /* Ensure that the directory exists. */ @@ -942,7 +945,7 @@ static int rerere_mem_getline(struct strbuf *sb, struct rerere_io *io_) return 0; } -static int handle_cache(const char *path, unsigned char *sha1, const char *output) +static int handle_cache(const char *path, unsigned char *hash, const char *output) { mmfile_t mmfile[3] = {{NULL}}; mmbuffer_t result = {NULL, 0}; @@ -1001,7 +1004,7 @@ static int handle_cache(const char *path, unsigned char *sha1, const char *outpu * Grab the conflict ID and optionally write the original * contents with conflict markers out. */ - has_conflicts = handle_path(sha1, (struct rerere_io *)&io, marker_size); + has_conflicts = handle_path(hash, (struct rerere_io *)&io, marker_size); strbuf_release(&io.input); if (io.io.output) fclose(io.io.output); @@ -1012,7 +1015,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr) { const char *filename; struct rerere_id *id; - unsigned char sha1[20]; + unsigned char hash[GIT_MAX_RAWSZ]; int ret; struct string_list_item *item; @@ -1020,12 +1023,12 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr) * Recreate the original conflict from the stages in the * index and compute the conflict ID */ - ret = handle_cache(path, sha1, NULL); + ret = handle_cache(path, hash, NULL); if (ret < 1) return error(_("could not parse conflict hunks in '%s'"), path); /* Nuke the recorded resolution for the conflict */ - id = new_rerere_id(sha1); + id = new_rerere_id(hash); for (id->variant = 0; id->variant < id->collection->status_nr; @@ -1037,7 +1040,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr) if (!has_rerere_resolution(id)) continue; - handle_cache(path, sha1, rerere_path(id, "thisimage")); + handle_cache(path, hash, rerere_path(id, "thisimage")); if (read_mmfile(&cur, rerere_path(id, "thisimage"))) { free(cur.ptr); error(_("failed to update conflicted state in '%s'"), path); @@ -1069,7 +1072,7 @@ static int rerere_forget_one_path(const char *path, struct string_list *rr) * conflict in the working tree, run us again to record * the postimage. */ - handle_cache(path, sha1, rerere_path(id, "preimage")); + handle_cache(path, hash, rerere_path(id, "preimage")); fprintf_ln(stderr, _("Updated preimage for '%s'"), path); /*