Browse Source
* rc/histogram-diff: xdiff/xhistogram: drop need for additional variable xdiff/xhistogram: rely on xdl_trim_ends() xdiff/xhistogram: rework handling of recursed results xdiff: do away with xdl_mmfile_next() Make test number unique xdiff/xprepare: use a smaller sample size for histogram diff xdiff/xprepare: skip classification teach --histogram to diff t4033-diff-patience: factor out tests xdiff/xpatience: factor out fall-back-diff function xdiff/xprepare: refactor abort cleanups xdiff/xprepare: use memset() Conflicts: xdiff/xprepare.cmaint
Junio C Hamano
14 years ago
14 changed files with 655 additions and 285 deletions
@ -0,0 +1,165 @@
@@ -0,0 +1,165 @@
|
||||
#!/bin/sh |
||||
|
||||
test_diff_frobnitz() { |
||||
cat >file1 <<\EOF |
||||
#include <stdio.h> |
||||
|
||||
// Frobs foo heartily |
||||
int frobnitz(int foo) |
||||
{ |
||||
int i; |
||||
for(i = 0; i < 10; i++) |
||||
{ |
||||
printf("Your answer is: "); |
||||
printf("%d\n", foo); |
||||
} |
||||
} |
||||
|
||||
int fact(int n) |
||||
{ |
||||
if(n > 1) |
||||
{ |
||||
return fact(n-1) * n; |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
frobnitz(fact(10)); |
||||
} |
||||
EOF |
||||
|
||||
cat >file2 <<\EOF |
||||
#include <stdio.h> |
||||
|
||||
int fib(int n) |
||||
{ |
||||
if(n > 2) |
||||
{ |
||||
return fib(n-1) + fib(n-2); |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
// Frobs foo heartily |
||||
int frobnitz(int foo) |
||||
{ |
||||
int i; |
||||
for(i = 0; i < 10; i++) |
||||
{ |
||||
printf("%d\n", foo); |
||||
} |
||||
} |
||||
|
||||
int main(int argc, char **argv) |
||||
{ |
||||
frobnitz(fib(10)); |
||||
} |
||||
EOF |
||||
|
||||
cat >expect <<\EOF |
||||
diff --git a/file1 b/file2 |
||||
index 6faa5a3..e3af329 100644 |
||||
--- a/file1 |
||||
+++ b/file2 |
||||
@@ -1,26 +1,25 @@ |
||||
#include <stdio.h> |
||||
|
||||
+int fib(int n) |
||||
+{ |
||||
+ if(n > 2) |
||||
+ { |
||||
+ return fib(n-1) + fib(n-2); |
||||
+ } |
||||
+ return 1; |
||||
+} |
||||
+ |
||||
// Frobs foo heartily |
||||
int frobnitz(int foo) |
||||
{ |
||||
int i; |
||||
for(i = 0; i < 10; i++) |
||||
{ |
||||
- printf("Your answer is: "); |
||||
printf("%d\n", foo); |
||||
} |
||||
} |
||||
|
||||
-int fact(int n) |
||||
-{ |
||||
- if(n > 1) |
||||
- { |
||||
- return fact(n-1) * n; |
||||
- } |
||||
- return 1; |
||||
-} |
||||
- |
||||
int main(int argc, char **argv) |
||||
{ |
||||
- frobnitz(fact(10)); |
||||
+ frobnitz(fib(10)); |
||||
} |
||||
EOF |
||||
|
||||
STRATEGY=$1 |
||||
|
||||
test_expect_success "$STRATEGY diff" ' |
||||
test_must_fail git diff --no-index "--$STRATEGY" file1 file2 > output && |
||||
test_cmp expect output |
||||
' |
||||
|
||||
test_expect_success "$STRATEGY diff output is valid" ' |
||||
mv file2 expect && |
||||
git apply < output && |
||||
test_cmp expect file2 |
||||
' |
||||
} |
||||
|
||||
test_diff_unique() { |
||||
cat >uniq1 <<\EOF |
||||
1 |
||||
2 |
||||
3 |
||||
4 |
||||
5 |
||||
6 |
||||
EOF |
||||
|
||||
cat >uniq2 <<\EOF |
||||
a |
||||
b |
||||
c |
||||
d |
||||
e |
||||
f |
||||
EOF |
||||
|
||||
cat >expect <<\EOF |
||||
diff --git a/uniq1 b/uniq2 |
||||
index b414108..0fdf397 100644 |
||||
--- a/uniq1 |
||||
+++ b/uniq2 |
||||
@@ -1,6 +1,6 @@ |
||||
-1 |
||||
-2 |
||||
-3 |
||||
-4 |
||||
-5 |
||||
-6 |
||||
+a |
||||
+b |
||||
+c |
||||
+d |
||||
+e |
||||
+f |
||||
EOF |
||||
|
||||
STRATEGY=$1 |
||||
|
||||
test_expect_success 'completely different files' ' |
||||
test_must_fail git diff --no-index "--$STRATEGY" uniq1 uniq2 > output && |
||||
test_cmp expect output |
||||
' |
||||
} |
||||
|
@ -0,0 +1,12 @@
@@ -0,0 +1,12 @@
|
||||
#!/bin/sh |
||||
|
||||
test_description='histogram diff algorithm' |
||||
|
||||
. ./test-lib.sh |
||||
. "$TEST_DIRECTORY"/lib-diff-alternative.sh |
||||
|
||||
test_diff_frobnitz "histogram" |
||||
|
||||
test_diff_unique "histogram" |
||||
|
||||
test_done |
@ -0,0 +1,363 @@
@@ -0,0 +1,363 @@
|
||||
/* |
||||
* Copyright (C) 2010, Google Inc. |
||||
* and other copyright owners as documented in JGit's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php |
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
#include "xinclude.h" |
||||
#include "xtypes.h" |
||||
#include "xdiff.h" |
||||
|
||||
#define MAX_PTR UINT_MAX |
||||
#define MAX_CNT UINT_MAX |
||||
|
||||
#define LINE_END(n) (line##n + count##n - 1) |
||||
#define LINE_END_PTR(n) (*line##n + *count##n - 1) |
||||
|
||||
struct histindex { |
||||
struct record { |
||||
unsigned int ptr, cnt; |
||||
struct record *next; |
||||
} **records, /* an ocurrence */ |
||||
**line_map; /* map of line to record chain */ |
||||
chastore_t rcha; |
||||
unsigned int *next_ptrs; |
||||
unsigned int table_bits, |
||||
records_size, |
||||
line_map_size; |
||||
|
||||
unsigned int max_chain_length, |
||||
key_shift, |
||||
ptr_shift; |
||||
|
||||
unsigned int cnt, |
||||
has_common; |
||||
|
||||
xdfenv_t *env; |
||||
xpparam_t const *xpp; |
||||
}; |
||||
|
||||
struct region { |
||||
unsigned int begin1, end1; |
||||
unsigned int begin2, end2; |
||||
}; |
||||
|
||||
#define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift]) |
||||
|
||||
#define NEXT_PTR(index, ptr) \ |
||||
(index->next_ptrs[(ptr) - index->ptr_shift]) |
||||
|
||||
#define CNT(index, ptr) \ |
||||
((LINE_MAP(index, ptr))->cnt) |
||||
|
||||
#define REC(env, s, l) \ |
||||
(env->xdf##s.recs[l - 1]) |
||||
|
||||
static int cmp_recs(xpparam_t const *xpp, |
||||
xrecord_t *r1, xrecord_t *r2) |
||||
{ |
||||
return r1->ha == r2->ha && |
||||
xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, |
||||
xpp->flags); |
||||
} |
||||
|
||||
#define CMP_ENV(xpp, env, s1, l1, s2, l2) \ |
||||
(cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) |
||||
|
||||
#define CMP(i, s1, l1, s2, l2) \ |
||||
(cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) |
||||
|
||||
#define TABLE_HASH(index, side, line) \ |
||||
XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) |
||||
|
||||
static int scanA(struct histindex *index, int line1, int count1) |
||||
{ |
||||
unsigned int ptr, tbl_idx; |
||||
unsigned int chain_len; |
||||
struct record **rec_chain, *rec; |
||||
|
||||
for (ptr = LINE_END(1); line1 <= ptr; ptr--) { |
||||
tbl_idx = TABLE_HASH(index, 1, ptr); |
||||
rec_chain = index->records + tbl_idx; |
||||
rec = *rec_chain; |
||||
|
||||
chain_len = 0; |
||||
while (rec) { |
||||
if (CMP(index, 1, rec->ptr, 1, ptr)) { |
||||
/* |
||||
* ptr is identical to another element. Insert |
||||
* it onto the front of the existing element |
||||
* chain. |
||||
*/ |
||||
NEXT_PTR(index, ptr) = rec->ptr; |
||||
rec->ptr = ptr; |
||||
/* cap rec->cnt at MAX_CNT */ |
||||
rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); |
||||
LINE_MAP(index, ptr) = rec; |
||||
goto continue_scan; |
||||
} |
||||
|
||||
rec = rec->next; |
||||
chain_len++; |
||||
} |
||||
|
||||
if (chain_len == index->max_chain_length) |
||||
return -1; |
||||
|
||||
/* |
||||
* This is the first time we have ever seen this particular |
||||
* element in the sequence. Construct a new chain for it. |
||||
*/ |
||||
if (!(rec = xdl_cha_alloc(&index->rcha))) |
||||
return -1; |
||||
rec->ptr = ptr; |
||||
rec->cnt = 1; |
||||
rec->next = *rec_chain; |
||||
*rec_chain = rec; |
||||
LINE_MAP(index, ptr) = rec; |
||||
|
||||
continue_scan: |
||||
; /* no op */ |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, |
||||
int line1, int count1, int line2, int count2) |
||||
{ |
||||
unsigned int b_next = b_ptr + 1; |
||||
struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; |
||||
unsigned int as, ae, bs, be, np, rc; |
||||
int should_break; |
||||
|
||||
for (; rec; rec = rec->next) { |
||||
if (rec->cnt > index->cnt) { |
||||
if (!index->has_common) |
||||
index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr); |
||||
continue; |
||||
} |
||||
|
||||
as = rec->ptr; |
||||
if (!CMP(index, 1, as, 2, b_ptr)) |
||||
continue; |
||||
|
||||
index->has_common = 1; |
||||
for (;;) { |
||||
should_break = 0; |
||||
np = NEXT_PTR(index, as); |
||||
bs = b_ptr; |
||||
ae = as; |
||||
be = bs; |
||||
rc = rec->cnt; |
||||
|
||||
while (line1 < as && line2 < bs |
||||
&& CMP(index, 1, as - 1, 2, bs - 1)) { |
||||
as--; |
||||
bs--; |
||||
if (1 < rc) |
||||
rc = XDL_MIN(rc, CNT(index, as)); |
||||
} |
||||
while (ae < LINE_END(1) && be < LINE_END(2) |
||||
&& CMP(index, 1, ae + 1, 2, be + 1)) { |
||||
ae++; |
||||
be++; |
||||
if (1 < rc) |
||||
rc = XDL_MIN(rc, CNT(index, ae)); |
||||
} |
||||
|
||||
if (b_next <= be) |
||||
b_next = be + 1; |
||||
if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) { |
||||
lcs->begin1 = as; |
||||
lcs->begin2 = bs; |
||||
lcs->end1 = ae; |
||||
lcs->end2 = be; |
||||
index->cnt = rc; |
||||
} |
||||
|
||||
if (np == 0) |
||||
break; |
||||
|
||||
while (np <= ae) { |
||||
np = NEXT_PTR(index, np); |
||||
if (np == 0) { |
||||
should_break = 1; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if (should_break) |
||||
break; |
||||
|
||||
as = np; |
||||
} |
||||
} |
||||
return b_next; |
||||
} |
||||
|
||||
static int find_lcs(struct histindex *index, struct region *lcs, |
||||
int line1, int count1, int line2, int count2) { |
||||
int b_ptr; |
||||
|
||||
if (scanA(index, line1, count1)) |
||||
return -1; |
||||
|
||||
index->cnt = index->max_chain_length + 1; |
||||
|
||||
for (b_ptr = line2; b_ptr <= LINE_END(2); ) |
||||
b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2); |
||||
|
||||
return index->has_common && index->max_chain_length < index->cnt; |
||||
} |
||||
|
||||
static int fall_back_to_classic_diff(struct histindex *index, |
||||
int line1, int count1, int line2, int count2) |
||||
{ |
||||
xpparam_t xpp; |
||||
xpp.flags = index->xpp->flags & ~XDF_HISTOGRAM_DIFF; |
||||
|
||||
return xdl_fall_back_diff(index->env, &xpp, |
||||
line1, count1, line2, count2); |
||||
} |
||||
|
||||
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, |
||||
int line1, int count1, int line2, int count2) |
||||
{ |
||||
struct histindex index; |
||||
struct region lcs; |
||||
int sz; |
||||
int result = -1; |
||||
|
||||
if (count1 <= 0 && count2 <= 0) |
||||
return 0; |
||||
|
||||
if (LINE_END(1) >= MAX_PTR) |
||||
return -1; |
||||
|
||||
if (!count1) { |
||||
while(count2--) |
||||
env->xdf2.rchg[line2++ - 1] = 1; |
||||
return 0; |
||||
} else if (!count2) { |
||||
while(count1--) |
||||
env->xdf1.rchg[line1++ - 1] = 1; |
||||
return 0; |
||||
} |
||||
|
||||
memset(&index, 0, sizeof(index)); |
||||
|
||||
index.env = env; |
||||
index.xpp = xpp; |
||||
|
||||
index.records = NULL; |
||||
index.line_map = NULL; |
||||
/* in case of early xdl_cha_free() */ |
||||
index.rcha.head = NULL; |
||||
|
||||
index.table_bits = xdl_hashbits(count1); |
||||
sz = index.records_size = 1 << index.table_bits; |
||||
sz *= sizeof(struct record *); |
||||
if (!(index.records = (struct record **) xdl_malloc(sz))) |
||||
goto cleanup; |
||||
memset(index.records, 0, sz); |
||||
|
||||
sz = index.line_map_size = count1; |
||||
sz *= sizeof(struct record *); |
||||
if (!(index.line_map = (struct record **) xdl_malloc(sz))) |
||||
goto cleanup; |
||||
memset(index.line_map, 0, sz); |
||||
|
||||
sz = index.line_map_size; |
||||
sz *= sizeof(unsigned int); |
||||
if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) |
||||
goto cleanup; |
||||
memset(index.next_ptrs, 0, sz); |
||||
|
||||
/* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ |
||||
if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) |
||||
goto cleanup; |
||||
|
||||
index.ptr_shift = line1; |
||||
index.max_chain_length = 64; |
||||
|
||||
memset(&lcs, 0, sizeof(lcs)); |
||||
if (find_lcs(&index, &lcs, line1, count1, line2, count2)) |
||||
result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); |
||||
else { |
||||
if (lcs.begin1 == 0 && lcs.begin2 == 0) { |
||||
while (count1--) |
||||
env->xdf1.rchg[line1++ - 1] = 1; |
||||
while (count2--) |
||||
env->xdf2.rchg[line2++ - 1] = 1; |
||||
result = 0; |
||||
} else { |
||||
result = histogram_diff(xpp, env, |
||||
line1, lcs.begin1 - line1, |
||||
line2, lcs.begin2 - line2); |
||||
if (result) |
||||
goto cleanup; |
||||
result = histogram_diff(xpp, env, |
||||
lcs.end1 + 1, LINE_END(1) - lcs.end1, |
||||
lcs.end2 + 1, LINE_END(2) - lcs.end2); |
||||
if (result) |
||||
goto cleanup; |
||||
} |
||||
} |
||||
|
||||
cleanup: |
||||
xdl_free(index.records); |
||||
xdl_free(index.line_map); |
||||
xdl_free(index.next_ptrs); |
||||
xdl_cha_free(&index.rcha); |
||||
|
||||
return result; |
||||
} |
||||
|
||||
int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, |
||||
xpparam_t const *xpp, xdfenv_t *env) |
||||
{ |
||||
if (xdl_prepare_env(file1, file2, xpp, env) < 0) |
||||
return -1; |
||||
|
||||
return histogram_diff(xpp, env, |
||||
env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1, |
||||
env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1); |
||||
} |
Loading…
Reference in new issue