xdiff: delete chastore from xdfile_t

xdfile_t currently uses chastore_t which is an arena allocator. I
think that xrecord_t used to be a linked list and recs didn't exist
originally. When recs was added I think they forgot to remove
xdfile_t.next, but was overlooked. This dual data structure setup
makes the code somewhat confusing.

Additionally the C type chastore_t isn't FFI friendly, and provides
little to no performance benefit over using realloc to grow an array.

Performance impact of deleting fields from xdfile_t:
Deleting ha is about 5% slower.
Deleting cha is about 5% faster.

Delete ha, but keep cha
  time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_ha/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
  Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
    Time (mean ± σ):      1.269 s ±  0.017 s    [User: 1.135 s, System: 0.128 s]
    Range (min … max):    1.249 s …  1.286 s    10 runs

  Benchmark 2: build_delete_ha/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
    Time (mean ± σ):      1.339 s ±  0.017 s    [User: 1.234 s, System: 0.099 s]
    Range (min … max):    1.320 s …  1.358 s    10 runs

  Summary
    build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
      1.06 ± 0.02 times faster than build_delete_ha/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null

Delete cha, but keep ha
  time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_chastore/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
  Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
    Time (mean ± σ):      1.290 s ±  0.001 s    [User: 1.154 s, System: 0.130 s]
    Range (min … max):    1.288 s …  1.292 s    10 runs

  Benchmark 2: build_delete_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
    Time (mean ± σ):      1.232 s ±  0.017 s    [User: 1.105 s, System: 0.121 s]
    Range (min … max):    1.205 s …  1.249 s    10 runs

  Summary
    build_delete_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
      1.05 ± 0.01 times faster than build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null

Delete ha AND chastore
  time hyperfine --warmup 3 -L exe build_v2.51.0/git,build_delete_ha_and_chastore/git '{exe} log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null'
  Benchmark 1: build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
    Time (mean ± σ):      1.291 s ±  0.002 s    [User: 1.156 s, System: 0.129 s]
    Range (min … max):    1.287 s …  1.295 s    10 runs

  Benchmark 2: build_delete_ha_and_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null
    Time (mean ± σ):      1.306 s ±  0.001 s    [User: 1.195 s, System: 0.105 s]
    Range (min … max):    1.305 s …  1.308 s    10 runs

  Summary
    build_v2.51.0/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null ran
      1.01 ± 0.00 times faster than build_delete_ha_and_chastore/git log --oneline --shortstat --diff-algorithm=myers -3000 v2.39.1 >/dev/null

Best-viewed-with: --color-words
Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
Ezekiel Newren 2025-09-26 22:41:56 +00:00 committed by Junio C Hamano
parent 6d507bd41a
commit d43d591252
8 changed files with 63 additions and 69 deletions

View File

@ -24,7 +24,7 @@

static unsigned long get_hash(xdfile_t *xdf, long index)
{
return xdf->recs[xdf->rindex[index]]->ha;
return xdf->recs[xdf->rindex[index]].ha;
}

#define XDL_MAX_COST_MIN 256
@ -489,13 +489,13 @@ static void measure_split(const xdfile_t *xdf, long split,
m->indent = -1;
} else {
m->end_of_file = 0;
m->indent = get_indent(xdf->recs[split]);
m->indent = get_indent(&xdf->recs[split]);
}

m->pre_blank = 0;
m->pre_indent = -1;
for (i = split - 1; i >= 0; i--) {
m->pre_indent = get_indent(xdf->recs[i]);
m->pre_indent = get_indent(&xdf->recs[i]);
if (m->pre_indent != -1)
break;
m->pre_blank += 1;
@ -508,7 +508,7 @@ static void measure_split(const xdfile_t *xdf, long split,
m->post_blank = 0;
m->post_indent = -1;
for (i = split + 1; i < xdf->nrec; i++) {
m->post_indent = get_indent(xdf->recs[i]);
m->post_indent = get_indent(&xdf->recs[i]);
if (m->post_indent != -1)
break;
m->post_blank += 1;
@ -752,7 +752,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
{
if (g->end < xdf->nrec &&
recs_match(xdf->recs[g->start], xdf->recs[g->end])) {
recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) {
xdf->rchg[g->start++] = 0;
xdf->rchg[g->end++] = 1;

@ -773,7 +773,7 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
{
if (g->start > 0 &&
recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) {
recs_match(&xdf->recs[g->start - 1], &xdf->recs[g->end - 1])) {
xdf->rchg[--g->start] = 1;
xdf->rchg[--g->end] = 0;

@ -988,16 +988,16 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)

for (xch = xscr; xch; xch = xch->next) {
int ignore = 1;
xrecord_t **rec;
xrecord_t *rec;
long i;

rec = &xe->xdf1.recs[xch->i1];
for (i = 0; i < xch->chg1 && ignore; i++)
ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);

rec = &xe->xdf2.recs[xch->i2];
for (i = 0; i < xch->chg2 && ignore; i++)
ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);

xch->ignore = ignore;
}
@ -1021,7 +1021,7 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
xdchange_t *xch;

for (xch = xscr; xch; xch = xch->next) {
xrecord_t **rec;
xrecord_t *rec;
int ignore = 1;
long i;

@ -1033,11 +1033,11 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,

rec = &xe->xdf1.recs[xch->i1];
for (i = 0; i < xch->chg1 && ignore; i++)
ignore = record_matches_regex(rec[i], xpp);
ignore = record_matches_regex(&rec[i], xpp);

rec = &xe->xdf2.recs[xch->i2];
for (i = 0; i < xch->chg2 && ignore; i++)
ignore = record_matches_regex(rec[i], xpp);
ignore = record_matches_regex(&rec[i], xpp);

xch->ignore = ignore;
}

View File

@ -25,7 +25,7 @@

static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb)
{
xrecord_t *rec = xdf->recs[ri];
xrecord_t *rec = &xdf->recs[ri];

if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0)
return -1;
@ -110,7 +110,7 @@ static long def_ff(const char *rec, long len, char *buf, long sz)
static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
char *buf, long sz)
{
xrecord_t *rec = xdf->recs[ri];
xrecord_t *rec = &xdf->recs[ri];

if (!xecfg->find_func)
return def_ff(rec->ptr, rec->size, buf, sz);
@ -150,7 +150,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,

static int is_empty_rec(xdfile_t *xdf, long ri)
{
xrecord_t *rec = xdf->recs[ri];
xrecord_t *rec = &xdf->recs[ri];
long i = 0;

for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++);

View File

@ -86,7 +86,7 @@ struct region {
((LINE_MAP(index, ptr))->cnt)

#define REC(env, s, l) \
(env->xdf##s.recs[l - 1])
(&env->xdf##s.recs[l - 1])

static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
{

View File

@ -97,12 +97,12 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
int line_count, long flags)
{
int i;
xrecord_t **rec1 = xe1->xdf2.recs + i1;
xrecord_t **rec2 = xe2->xdf2.recs + i2;
xrecord_t *rec1 = xe1->xdf2.recs + i1;
xrecord_t *rec2 = xe2->xdf2.recs + i2;

for (i = 0; i < line_count; i++) {
int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size,
rec2[i]->ptr, rec2[i]->size, flags);
int result = xdl_recmatch(rec1[i].ptr, rec1[i].size,
rec2[i].ptr, rec2[i].size, flags);
if (!result)
return -1;
}
@ -111,7 +111,7 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,

static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest)
{
xrecord_t **recs;
xrecord_t *recs;
int size = 0;

recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i;
@ -119,12 +119,12 @@ static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int nee
if (count < 1)
return 0;

for (i = 0; i < count; size += recs[i++]->size)
for (i = 0; i < count; size += recs[i++].size)
if (dest)
memcpy(dest + size, recs[i]->ptr, recs[i]->size);
memcpy(dest + size, recs[i].ptr, recs[i].size);
if (add_nl) {
i = recs[count - 1]->size;
if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') {
i = recs[count - 1].size;
if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') {
if (needs_cr) {
if (dest)
dest[size] = '\r';
@ -160,22 +160,22 @@ static int is_eol_crlf(xdfile_t *file, int i)

if (i < file->nrec - 1)
/* All lines before the last *must* end in LF */
return (size = file->recs[i]->size) > 1 &&
file->recs[i]->ptr[size - 2] == '\r';
return (size = file->recs[i].size) > 1 &&
file->recs[i].ptr[size - 2] == '\r';
if (!file->nrec)
/* Cannot determine eol style from empty file */
return -1;
if ((size = file->recs[i]->size) &&
file->recs[i]->ptr[size - 1] == '\n')
if ((size = file->recs[i].size) &&
file->recs[i].ptr[size - 1] == '\n')
/* Last line; ends in LF; Is it CR/LF? */
return size > 1 &&
file->recs[i]->ptr[size - 2] == '\r';
file->recs[i].ptr[size - 2] == '\r';
if (!i)
/* The only line has no eol */
return -1;
/* Determine eol from second-to-last line */
return (size = file->recs[i - 1]->size) > 1 &&
file->recs[i - 1]->ptr[size - 2] == '\r';
return (size = file->recs[i - 1].size) > 1 &&
file->recs[i - 1].ptr[size - 2] == '\r';
}

static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m)
@ -334,22 +334,22 @@ static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags)
static void xdl_refine_zdiff3_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
xpparam_t const *xpp)
{
xrecord_t **rec1 = xe1->xdf2.recs, **rec2 = xe2->xdf2.recs;
xrecord_t *rec1 = xe1->xdf2.recs, *rec2 = xe2->xdf2.recs;
for (; m; m = m->next) {
/* let's handle just the conflicts */
if (m->mode)
continue;

while(m->chg1 && m->chg2 &&
recmatch(rec1[m->i1], rec2[m->i2], xpp->flags)) {
recmatch(&rec1[m->i1], &rec2[m->i2], xpp->flags)) {
m->chg1--;
m->chg2--;
m->i1++;
m->i2++;
}
while (m->chg1 && m->chg2 &&
recmatch(rec1[m->i1 + m->chg1 - 1],
rec2[m->i2 + m->chg2 - 1], xpp->flags)) {
recmatch(&rec1[m->i1 + m->chg1 - 1],
&rec2[m->i2 + m->chg2 - 1], xpp->flags)) {
m->chg1--;
m->chg2--;
}
@ -381,12 +381,12 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
* This probably does not work outside git, since
* we have a very simple mmfile structure.
*/
t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr;
t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr
+ xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr;
t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr;
t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr
+ xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr;
t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr;
t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr
+ xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr;
t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr;
t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr
+ xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr;
if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0)
return -1;
if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
@ -440,8 +440,8 @@ static int line_contains_alnum(const char *ptr, long size)
static int lines_contain_alnum(xdfenv_t *xe, int i, int chg)
{
for (; chg; chg--, i++)
if (line_contains_alnum(xe->xdf2.recs[i]->ptr,
xe->xdf2.recs[i]->size))
if (line_contains_alnum(xe->xdf2.recs[i].ptr,
xe->xdf2.recs[i].size))
return 1;
return 0;
}

View File

@ -88,9 +88,9 @@ static int is_anchor(xpparam_t const *xpp, const char *line)
static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
int pass)
{
xrecord_t **records = pass == 1 ?
xrecord_t *records = pass == 1 ?
map->env->xdf1.recs : map->env->xdf2.recs;
xrecord_t *record = records[line - 1];
xrecord_t *record = &records[line - 1];
/*
* After xdl_prepare_env() (or more precisely, due to
* xdl_classify_record()), the "ha" member of the records (AKA lines)
@ -121,7 +121,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
return;
map->entries[index].line1 = line;
map->entries[index].hash = record->ha;
map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr);
map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr);
if (!map->first)
map->first = map->entries + index;
if (map->last) {
@ -246,8 +246,8 @@ static int find_longest_common_sequence(struct hashmap *map, struct entry **res)

static int match(struct hashmap *map, int line1, int line2)
{
xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];
xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];
xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1];
xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1];
return record1->ha == record2->ha;
}


View File

@ -128,7 +128,6 @@ static void xdl_free_ctx(xdfile_t *xdf)
xdl_free(xdf->rindex);
xdl_free(xdf->rchg - 1);
xdl_free(xdf->recs);
xdl_cha_free(&xdf->rcha);
}


@ -143,8 +142,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
xdf->rchg = NULL;
xdf->recs = NULL;

if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
goto abort;
if (!XDL_ALLOC_ARRAY(xdf->recs, narec))
goto abort;

@ -155,12 +152,10 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
hav = xdl_hash_record(&cur, top, xpp->flags);
if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
goto abort;
if (!(crec = xdl_cha_alloc(&xdf->rcha)))
goto abort;
crec = &xdf->recs[xdf->nrec++];
crec->ptr = prev;
crec->size = (long) (cur - prev);
crec->ha = hav;
xdf->recs[xdf->nrec++] = crec;
if (xdl_classify_record(pass, cf, crec) < 0)
goto abort;
}
@ -260,7 +255,7 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
*/
static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
long i, nm, nreff, mlim;
xrecord_t **recs;
xrecord_t *recs;
xdlclass_t *rcrec;
char *dis, *dis1, *dis2;
int need_min = !!(cf->flags & XDF_NEED_MINIMAL);
@ -273,7 +268,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
mlim = XDL_MAX_EQLIMIT;
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
rcrec = cf->rcrecs[(*recs)->ha];
rcrec = cf->rcrecs[recs->ha];
nm = rcrec ? rcrec->len2 : 0;
dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1;
}
@ -281,7 +276,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
mlim = XDL_MAX_EQLIMIT;
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
rcrec = cf->rcrecs[(*recs)->ha];
rcrec = cf->rcrecs[recs->ha];
nm = rcrec ? rcrec->len1 : 0;
dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1;
}
@ -317,13 +312,13 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
*/
static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
long i, lim;
xrecord_t **recs1, **recs2;
xrecord_t *recs1, *recs2;

recs1 = xdf1->recs;
recs2 = xdf2->recs;
for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
i++, recs1++, recs2++)
if ((*recs1)->ha != (*recs2)->ha)
if (recs1->ha != recs2->ha)
break;

xdf1->dstart = xdf2->dstart = i;
@ -331,7 +326,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
recs1 = xdf1->recs + xdf1->nrec - 1;
recs2 = xdf2->recs + xdf2->nrec - 1;
for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--)
if ((*recs1)->ha != (*recs2)->ha)
if (recs1->ha != recs2->ha)
break;

xdf1->dend = xdf1->nrec - i - 1;

View File

@ -45,10 +45,9 @@ typedef struct s_xrecord {
} xrecord_t;

typedef struct s_xdfile {
chastore_t rcha;
xrecord_t *recs;
long nrec;
long dstart, dend;
xrecord_t **recs;
char *rchg;
long *rindex;
long nreff;

View File

@ -416,12 +416,12 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
mmfile_t subfile1, subfile2;
xdfenv_t env;

subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr;
subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr +
diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr;
subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr;
subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr +
diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr;
subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr;
subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr +
diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr;
subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr;
subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr +
diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr;
if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0)
return -1;