xdiff: optimize xdl_hash_record_verbatim
xdl_hash_record_verbatim uses modified djb2 hash with XOR instead of ADD
for combining. The ADD-based variant is used as the basis of the modern
("GNU") symbol lookup scheme in ELF. Glibc dynamic loader received an
optimized version of this hash function thanks to Noah Goldstein [1].
Switch xdl_hash_record_verbatim to additive hashing and implement
an optimized loop following the scheme suggested by Noah.
Timing 'git log --oneline --shortstat v2.0.0..v2.5.0' under perf, I got
version | cycles, bn | instructions, bn
---------------------------------------
A 6.38 11.3
B 6.21 10.89
C 5.80 9.95
D 5.83 8.74
---------------------------------------
A: baseline (git master at e4ef0485fd
)
B: plus 'xdiff: refactor xdl_hash_record()'
C: and plus this patch
D: with 'xdiff: use xxhash' by Phillip Wood
The resulting speedup for xdl_hash_record_verbatim itself is about 1.5x.
[1] https://inbox.sourceware.org/libc-alpha/20220519221803.57957-6-goldstein.w.n@gmail.com/
Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
parent
41d97837ab
commit
a4bbe8af0b
|
@ -294,16 +294,67 @@ unsigned long xdl_hash_record_with_whitespace(char const **data,
|
|||
return ha;
|
||||
}
|
||||
|
||||
unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
|
||||
unsigned long ha = 5381;
|
||||
char const *ptr = *data;
|
||||
/*
|
||||
* Compiler reassociation barrier: pretend to modify X and Y to disallow
|
||||
* changing evaluation order with respect to following uses of X and Y.
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y))
|
||||
#else
|
||||
#define REASSOC_FENCE(x, y)
|
||||
#endif
|
||||
|
||||
unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
|
||||
unsigned long ha = 5381, c0, c1;
|
||||
char const *ptr = *data;
|
||||
#if 0
|
||||
/*
|
||||
* The baseline form of the optimized loop below. This is the djb2
|
||||
* hash (the above function uses a variant with XOR instead of ADD).
|
||||
*/
|
||||
for (; ptr < top && *ptr != '\n'; ptr++) {
|
||||
ha += (ha << 5);
|
||||
ha ^= (unsigned long) *ptr;
|
||||
ha += (unsigned long) *ptr;
|
||||
}
|
||||
*data = ptr < top ? ptr + 1: ptr;
|
||||
#else
|
||||
/* Process two characters per iteration. */
|
||||
if (top - ptr >= 2) do {
|
||||
if ((c0 = ptr[0]) == '\n') {
|
||||
*data = ptr + 1;
|
||||
return ha;
|
||||
}
|
||||
if ((c1 = ptr[1]) == '\n') {
|
||||
*data = ptr + 2;
|
||||
c0 += ha;
|
||||
REASSOC_FENCE(c0, ha);
|
||||
ha = ha * 32 + c0;
|
||||
return ha;
|
||||
}
|
||||
/*
|
||||
* Combine characters C0 and C1 into the hash HA. We have
|
||||
* HA = (HA * 33 + C0) * 33 + C1, and we want to ensure
|
||||
* that dependency chain over HA is just one multiplication
|
||||
* and one addition, i.e. we want to evaluate this as
|
||||
* HA = HA * 33 * 33 + (C0 * 33 + C1), and likewise prefer
|
||||
* (C0 * 32 + (C0 + C1)) for the expression in parenthesis.
|
||||
*/
|
||||
ha *= 33 * 33;
|
||||
c1 += c0;
|
||||
REASSOC_FENCE(c1, c0);
|
||||
c1 += c0 * 32;
|
||||
REASSOC_FENCE(c1, ha);
|
||||
ha += c1;
|
||||
|
||||
ptr += 2;
|
||||
} while (ptr < top - 1);
|
||||
*data = top;
|
||||
if (ptr < top && (c0 = ptr[0]) != '\n') {
|
||||
c0 += ha;
|
||||
REASSOC_FENCE(c0, ha);
|
||||
ha = ha * 32 + c0;
|
||||
}
|
||||
#endif
|
||||
return ha;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue