cat-file: provide %(deltabase) batch format
It can be useful for debugging or analysis to see which objects are stored as delta bases on top of others. This information is available by running `git verify-pack`, but that is extremely expensive (and is harder than necessary to parse). Instead, let's make it available as a cat-file query format, which makes it fast and simple to get the bases for a subset of the objects. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
							parent
							
								
									5d642e7506
								
							
						
					
					
						commit
						65ea9c3c3d
					
				|  | @ -109,6 +109,11 @@ newline. The available atoms are: | ||||||
| 	The size, in bytes, that the object takes up on disk. See the | 	The size, in bytes, that the object takes up on disk. See the | ||||||
| 	note about on-disk sizes in the `CAVEATS` section below. | 	note about on-disk sizes in the `CAVEATS` section below. | ||||||
|  |  | ||||||
|  | `deltabase`:: | ||||||
|  | 	If the object is stored as a delta on-disk, this expands to the | ||||||
|  | 	40-hex sha1 of the delta base object. Otherwise, expands to the | ||||||
|  | 	null sha1 (40 zeroes). See `CAVEATS` below. | ||||||
|  |  | ||||||
| `rest`:: | `rest`:: | ||||||
| 	If this atom is used in the output string, input lines are split | 	If this atom is used in the output string, input lines are split | ||||||
| 	at the first whitespace boundary. All characters before that | 	at the first whitespace boundary. All characters before that | ||||||
|  | @ -152,10 +157,11 @@ should be taken in drawing conclusions about which refs or objects are | ||||||
| responsible for disk usage. The size of a packed non-delta object may be | responsible for disk usage. The size of a packed non-delta object may be | ||||||
| much larger than the size of objects which delta against it, but the | much larger than the size of objects which delta against it, but the | ||||||
| choice of which object is the base and which is the delta is arbitrary | choice of which object is the base and which is the delta is arbitrary | ||||||
| and is subject to change during a repack. Note also that multiple copies | and is subject to change during a repack. | ||||||
| of an object may be present in the object database; in this case, it is |  | ||||||
| undefined which copy's size will be reported. |  | ||||||
|  |  | ||||||
|  | Note also that multiple copies of an object may be present in the object | ||||||
|  | database; in this case, it is undefined which copy's size or delta base | ||||||
|  | will be reported. | ||||||
|  |  | ||||||
| GIT | GIT | ||||||
| --- | --- | ||||||
|  |  | ||||||
|  | @ -118,6 +118,7 @@ struct expand_data { | ||||||
| 	unsigned long size; | 	unsigned long size; | ||||||
| 	unsigned long disk_size; | 	unsigned long disk_size; | ||||||
| 	const char *rest; | 	const char *rest; | ||||||
|  | 	unsigned char delta_base_sha1[20]; | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * If mark_query is true, we do not expand anything, but rather | 	 * If mark_query is true, we do not expand anything, but rather | ||||||
|  | @ -174,6 +175,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len, | ||||||
| 			data->split_on_whitespace = 1; | 			data->split_on_whitespace = 1; | ||||||
| 		else if (data->rest) | 		else if (data->rest) | ||||||
| 			strbuf_addstr(sb, data->rest); | 			strbuf_addstr(sb, data->rest); | ||||||
|  | 	} else if (is_atom("deltabase", atom, len)) { | ||||||
|  | 		if (data->mark_query) | ||||||
|  | 			data->info.delta_base_sha1 = data->delta_base_sha1; | ||||||
|  | 		else | ||||||
|  | 			strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1)); | ||||||
| 	} else | 	} else | ||||||
| 		die("unknown format element: %.*s", len, atom); | 		die("unknown format element: %.*s", len, atom); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -240,4 +240,38 @@ test_expect_success "--batch-check with multiple sha1s gives correct format" ' | ||||||
|     "$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)" |     "$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)" | ||||||
| ' | ' | ||||||
|  |  | ||||||
|  | test_expect_success 'setup blobs which are likely to delta' ' | ||||||
|  | 	test-genrandom foo 10240 >foo && | ||||||
|  | 	{ cat foo; echo plus; } >foo-plus && | ||||||
|  | 	git add foo foo-plus && | ||||||
|  | 	git commit -m foo && | ||||||
|  | 	cat >blobs <<-\EOF | ||||||
|  | 	HEAD:foo | ||||||
|  | 	HEAD:foo-plus | ||||||
|  | 	EOF | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | test_expect_success 'confirm that neither loose blob is a delta' ' | ||||||
|  | 	cat >expect <<-EOF | ||||||
|  | 	$_z40 | ||||||
|  | 	$_z40 | ||||||
|  | 	EOF | ||||||
|  | 	git cat-file --batch-check="%(deltabase)" <blobs >actual && | ||||||
|  | 	test_cmp expect actual | ||||||
|  | ' | ||||||
|  |  | ||||||
|  | # To avoid relying too much on the current delta heuristics, | ||||||
|  | # we will check only that one of the two objects is a delta | ||||||
|  | # against the other, but not the order. We can do so by just | ||||||
|  | # asking for the base of both, and checking whether either | ||||||
|  | # sha1 appears in the output. | ||||||
|  | test_expect_success '%(deltabase) reports packed delta bases' ' | ||||||
|  | 	git repack -ad && | ||||||
|  | 	git cat-file --batch-check="%(deltabase)" <blobs >actual && | ||||||
|  | 	{ | ||||||
|  | 		grep "$(git rev-parse HEAD:foo)" actual || | ||||||
|  | 		grep "$(git rev-parse HEAD:foo-plus)" actual | ||||||
|  | 	} | ||||||
|  | ' | ||||||
|  |  | ||||||
| test_done | test_done | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Jeff King
						Jeff King