|
|
|
#include "cache.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "object-store.h"
|
|
|
|
#include "xdiff-interface.h"
|
|
|
|
#include "xdiff/xtypes.h"
|
|
|
|
#include "xdiff/xdiffi.h"
|
|
|
|
#include "xdiff/xemit.h"
|
|
|
|
#include "xdiff/xmacros.h"
|
|
|
|
#include "xdiff/xutils.h"
|
|
|
|
|
|
|
|
struct xdiff_emit_state {
|
|
|
|
xdiff_emit_consume_fn consume;
|
|
|
|
void *consume_callback_data;
|
|
|
|
struct strbuf remainder;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int parse_num(char **cp_p, int *num_p)
|
|
|
|
{
|
|
|
|
char *cp = *cp_p;
|
|
|
|
int num = 0;
|
|
|
|
|
|
|
|
while ('0' <= *cp && *cp <= '9')
|
|
|
|
num = num * 10 + *cp++ - '0';
|
|
|
|
if (!(cp - *cp_p))
|
|
|
|
return -1;
|
|
|
|
*cp_p = cp;
|
|
|
|
*num_p = num;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int parse_hunk_header(char *line, int len,
|
|
|
|
int *ob, int *on,
|
|
|
|
int *nb, int *nn)
|
|
|
|
{
|
|
|
|
char *cp;
|
|
|
|
cp = line + 4;
|
|
|
|
if (parse_num(&cp, ob)) {
|
|
|
|
bad_line:
|
|
|
|
return error("malformed diff output: %s", line);
|
|
|
|
}
|
|
|
|
if (*cp == ',') {
|
|
|
|
cp++;
|
|
|
|
if (parse_num(&cp, on))
|
|
|
|
goto bad_line;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*on = 1;
|
|
|
|
if (*cp++ != ' ' || *cp++ != '+')
|
|
|
|
goto bad_line;
|
|
|
|
if (parse_num(&cp, nb))
|
|
|
|
goto bad_line;
|
|
|
|
if (*cp == ',') {
|
|
|
|
cp++;
|
|
|
|
if (parse_num(&cp, nn))
|
|
|
|
goto bad_line;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
*nn = 1;
|
|
|
|
return -!!memcmp(cp, " @@", 3);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void consume_one(void *priv_, char *s, unsigned long size)
|
|
|
|
{
|
|
|
|
struct xdiff_emit_state *priv = priv_;
|
|
|
|
char *ep;
|
|
|
|
while (size) {
|
|
|
|
unsigned long this_size;
|
|
|
|
ep = memchr(s, '\n', size);
|
|
|
|
this_size = (ep == NULL) ? size : (ep - s + 1);
|
|
|
|
priv->consume(priv->consume_callback_data, s, this_size);
|
|
|
|
size -= this_size;
|
|
|
|
s += this_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Make xdi_diff_outf interface for running xdiff_outf diffs
To prepare for the need to initialize and release resources for an
xdi_diff with the xdiff_outf output function, make a new function to
wrap this usage.
Old:
ecb.outf = xdiff_outf;
ecb.priv = &state;
...
xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
New:
xdi_diff_outf(file_p, file_o, &state.xm, &xpp, &xecfg, &ecb);
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf)
|
|
|
|
{
|
|
|
|
struct xdiff_emit_state *priv = priv_;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < nbuf; i++) {
|
|
|
|
if (mb[i].ptr[mb[i].size-1] != '\n') {
|
|
|
|
/* Incomplete line */
|
|
|
|
strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we have a complete line */
|
|
|
|
if (!priv->remainder.len) {
|
|
|
|
consume_one(priv, mb[i].ptr, mb[i].size);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
|
|
|
|
consume_one(priv, priv->remainder.buf, priv->remainder.len);
|
|
|
|
strbuf_reset(&priv->remainder);
|
|
|
|
}
|
|
|
|
if (priv->remainder.len) {
|
|
|
|
consume_one(priv, priv->remainder.buf, priv->remainder.len);
|
|
|
|
strbuf_reset(&priv->remainder);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Trim down common substring at the end of the buffers,
|
|
|
|
* but end on a complete line.
|
|
|
|
*/
|
|
|
|
static void trim_common_tail(mmfile_t *a, mmfile_t *b)
|
|
|
|
{
|
|
|
|
const int blk = 1024;
|
|
|
|
long trimmed = 0, recovered = 0;
|
|
|
|
char *ap = a->ptr + a->size;
|
|
|
|
char *bp = b->ptr + b->size;
|
|
|
|
long smaller = (a->size < b->size) ? a->size : b->size;
|
|
|
|
|
|
|
|
while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) {
|
|
|
|
trimmed += blk;
|
|
|
|
ap -= blk;
|
|
|
|
bp -= blk;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (recovered < trimmed)
|
|
|
|
if (ap[recovered++] == '\n')
|
|
|
|
break;
|
|
|
|
a->size -= trimmed - recovered;
|
|
|
|
b->size -= trimmed - recovered;
|
|
|
|
}
|
|
|
|
|
|
|
|
int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t const *xecfg, xdemitcb_t *xecb)
|
|
|
|
{
|
|
|
|
mmfile_t a = *mf1;
|
|
|
|
mmfile_t b = *mf2;
|
|
|
|
|
xdiff: reject files larger than ~1GB
The xdiff code is not prepared to handle extremely large
files. It uses "int" in many places, which can overflow if
we have a very large number of lines or even bytes in our
input files. This can cause us to produce incorrect diffs,
with no indication that the output is wrong. Or worse, we
may even underallocate a buffer whose size is the result of
an overflowing addition.
We're much better off to tell the user that we cannot diff
or merge such a large file. This patch covers both cases,
but in slightly different ways:
1. For merging, we notice the large file and cleanly fall
back to a binary merge (which is effectively "we cannot
merge this").
2. For diffing, we make the binary/text distinction much
earlier, and in many different places. For this case,
we'll use the xdi_diff as our choke point, and reject
any diff there before it hits the xdiff code.
This means in most cases we'll die() immediately after.
That's not ideal, but in practice we shouldn't
generally hit this code path unless the user is trying
to do something tricky. We already consider files
larger than core.bigfilethreshold to be binary, so this
code would only kick in when that is circumvented
(either by bumping that value, or by using a
.gitattribute to mark a file as diffable).
In other words, we can avoid being "nice" here, because
there is already nice code that tries to do the right
thing. We are adding the suspenders to the nice code's
belt, so notice when it has been worked around (both to
protect the user from malicious inputs, and because it
is better to die() than generate bogus output).
The maximum size was chosen after experimenting with feeding
large files to the xdiff code. It's just under a gigabyte,
which leaves room for two obvious cases:
- a diff3 merge conflict result on files of maximum size X
could be 3*X plus the size of the markers, which would
still be only about 3G, which fits in a 32-bit int.
- some of the diff code allocates arrays of one int per
record. Even if each file consists only of blank lines,
then a file smaller than 1G will have fewer than 1G
records, and therefore the int array will fit in 4G.
Since the limit is arbitrary anyway, I chose to go under a
gigabyte, to leave a safety margin (e.g., we would not want
to overflow by allocating "(records + 1) * sizeof(int)" or
similar.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
9 years ago
|
|
|
if (mf1->size > MAX_XDIFF_SIZE || mf2->size > MAX_XDIFF_SIZE)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (!xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
|
|
|
|
trim_common_tail(&a, &b);
|
|
|
|
|
|
|
|
return xdl_diff(&a, &b, xpp, xecfg, xecb);
|
|
|
|
}
|
|
|
|
|
Make xdi_diff_outf interface for running xdiff_outf diffs
To prepare for the need to initialize and release resources for an
xdi_diff with the xdiff_outf output function, make a new function to
wrap this usage.
Old:
ecb.outf = xdiff_outf;
ecb.priv = &state;
...
xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
New:
xdi_diff_outf(file_p, file_o, &state.xm, &xpp, &xecfg, &ecb);
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
|
|
|
|
xdiff_emit_consume_fn fn, void *consume_callback_data,
|
|
|
|
xpparam_t const *xpp, xdemitconf_t const *xecfg)
|
Make xdi_diff_outf interface for running xdiff_outf diffs
To prepare for the need to initialize and release resources for an
xdi_diff with the xdiff_outf output function, make a new function to
wrap this usage.
Old:
ecb.outf = xdiff_outf;
ecb.priv = &state;
...
xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
New:
xdi_diff_outf(file_p, file_o, &state.xm, &xpp, &xecfg, &ecb);
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct xdiff_emit_state state;
|
|
|
|
xdemitcb_t ecb;
|
|
|
|
|
|
|
|
memset(&state, 0, sizeof(state));
|
|
|
|
state.consume = fn;
|
|
|
|
state.consume_callback_data = consume_callback_data;
|
|
|
|
memset(&ecb, 0, sizeof(ecb));
|
|
|
|
ecb.outf = xdiff_outf;
|
|
|
|
ecb.priv = &state;
|
|
|
|
strbuf_init(&state.remainder, 0);
|
|
|
|
ret = xdi_diff(mf1, mf2, xpp, xecfg, &ecb);
|
|
|
|
strbuf_release(&state.remainder);
|
Make xdi_diff_outf interface for running xdiff_outf diffs
To prepare for the need to initialize and release resources for an
xdi_diff with the xdiff_outf output function, make a new function to
wrap this usage.
Old:
ecb.outf = xdiff_outf;
ecb.priv = &state;
...
xdi_diff(file_p, file_o, &xpp, &xecfg, &ecb);
New:
xdi_diff_outf(file_p, file_o, &state.xm, &xpp, &xecfg, &ecb);
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int read_mmfile(mmfile_t *ptr, const char *filename)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
FILE *f;
|
|
|
|
size_t sz;
|
|
|
|
|
|
|
|
if (stat(filename, &st))
|
|
|
|
return error_errno("Could not stat %s", filename);
|
|
|
|
if ((f = fopen(filename, "rb")) == NULL)
|
|
|
|
return error_errno("Could not open %s", filename);
|
|
|
|
sz = xsize_t(st.st_size);
|
|
|
|
ptr->ptr = xmalloc(sz ? sz : 1);
|
|
|
|
if (sz && fread(ptr->ptr, sz, 1, f) != 1) {
|
|
|
|
fclose(f);
|
|
|
|
return error("Could not read %s", filename);
|
|
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
ptr->size = sz;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void read_mmblob(mmfile_t *ptr, const struct object_id *oid)
|
|
|
|
{
|
|
|
|
unsigned long size;
|
|
|
|
enum object_type type;
|
|
|
|
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
if (oideq(oid, &null_oid)) {
|
|
|
|
ptr->ptr = xstrdup("");
|
|
|
|
ptr->size = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
sha1_file: convert read_sha1_file to struct object_id
Convert read_sha1_file to take a pointer to struct object_id and rename
it read_object_file. Do the same for read_sha1_file_extended.
Convert one use in grep.c to use the new function without any other code
change, since the pointer being passed is a void pointer that is already
initialized with a pointer to struct object_id. Update the declaration
and definitions of the modified functions, and apply the following
semantic patch to convert the remaining callers:
@@
expression E1, E2, E3;
@@
- read_sha1_file(E1.hash, E2, E3)
+ read_object_file(&E1, E2, E3)
@@
expression E1, E2, E3;
@@
- read_sha1_file(E1->hash, E2, E3)
+ read_object_file(E1, E2, E3)
@@
expression E1, E2, E3, E4;
@@
- read_sha1_file_extended(E1.hash, E2, E3, E4)
+ read_object_file_extended(&E1, E2, E3, E4)
@@
expression E1, E2, E3, E4;
@@
- read_sha1_file_extended(E1->hash, E2, E3, E4)
+ read_object_file_extended(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
7 years ago
|
|
|
ptr->ptr = read_object_file(oid, &type, &size);
|
|
|
|
if (!ptr->ptr || type != OBJ_BLOB)
|
|
|
|
die("unable to read blob object %s", oid_to_hex(oid));
|
|
|
|
ptr->size = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define FIRST_FEW_BYTES 8000
|
|
|
|
int buffer_is_binary(const char *ptr, unsigned long size)
|
|
|
|
{
|
|
|
|
if (FIRST_FEW_BYTES < size)
|
|
|
|
size = FIRST_FEW_BYTES;
|
|
|
|
return !!memchr(ptr, 0, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ff_regs {
|
|
|
|
int nr;
|
|
|
|
struct ff_reg {
|
|
|
|
regex_t re;
|
|
|
|
int negate;
|
|
|
|
} *array;
|
|
|
|
};
|
|
|
|
|
|
|
|
static long ff_regexp(const char *line, long len,
|
|
|
|
char *buffer, long buffer_size, void *priv)
|
|
|
|
{
|
|
|
|
struct ff_regs *regs = priv;
|
|
|
|
regmatch_t pmatch[2];
|
|
|
|
int i;
|
|
|
|
int result;
|
|
|
|
|
|
|
|
/* Exclude terminating newline (and cr) from matching */
|
|
|
|
if (len > 0 && line[len-1] == '\n') {
|
|
|
|
if (len > 1 && line[len-2] == '\r')
|
|
|
|
len -= 2;
|
|
|
|
else
|
|
|
|
len--;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < regs->nr; i++) {
|
|
|
|
struct ff_reg *reg = regs->array + i;
|
|
|
|
if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) {
|
|
|
|
if (reg->negate)
|
|
|
|
return -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (regs->nr <= i)
|
|
|
|
return -1;
|
|
|
|
i = pmatch[1].rm_so >= 0 ? 1 : 0;
|
|
|
|
line += pmatch[i].rm_so;
|
|
|
|
result = pmatch[i].rm_eo - pmatch[i].rm_so;
|
|
|
|
if (result > buffer_size)
|
|
|
|
result = buffer_size;
|
|
|
|
while (result > 0 && (isspace(line[result - 1])))
|
|
|
|
result--;
|
|
|
|
memcpy(buffer, line, result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value, int cflags)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct ff_regs *regs;
|
|
|
|
|
|
|
|
xecfg->find_func = ff_regexp;
|
|
|
|
regs = xecfg->find_func_priv = xmalloc(sizeof(struct ff_regs));
|
|
|
|
for (i = 0, regs->nr = 1; value[i]; i++)
|
|
|
|
if (value[i] == '\n')
|
|
|
|
regs->nr++;
|
|
|
|
ALLOC_ARRAY(regs->array, regs->nr);
|
|
|
|
for (i = 0; i < regs->nr; i++) {
|
|
|
|
struct ff_reg *reg = regs->array + i;
|
|
|
|
const char *ep = strchr(value, '\n'), *expression;
|
|
|
|
char *buffer = NULL;
|
|
|
|
|
|
|
|
reg->negate = (*value == '!');
|
|
|
|
if (reg->negate && i == regs->nr - 1)
|
|
|
|
die("Last expression must not be negated: %s", value);
|
|
|
|
if (*value == '!')
|
|
|
|
value++;
|
|
|
|
if (ep)
|
|
|
|
expression = buffer = xstrndup(value, ep - value);
|
|
|
|
else
|
|
|
|
expression = value;
|
|
|
|
if (regcomp(®->re, expression, cflags))
|
|
|
|
die("Invalid regexp to look for hunk header: %s", expression);
|
Avoid unnecessary "if-before-free" tests.
This change removes all obvious useless if-before-free tests.
E.g., it replaces code like this:
if (some_expression)
free (some_expression);
with the now-equivalent:
free (some_expression);
It is equivalent not just because POSIX has required free(NULL)
to work for a long time, but simply because it has worked for
so long that no reasonable porting target fails the test.
Here's some evidence from nearly 1.5 years ago:
http://www.winehq.org/pipermail/wine-patches/2006-October/031544.html
FYI, the change below was prepared by running the following:
git ls-files -z | xargs -0 \
perl -0x3b -pi -e \
's/\bif\s*\(\s*(\S+?)(?:\s*!=\s*NULL)?\s*\)\s+(free\s*\(\s*\1\s*\))/$2/s'
Note however, that it doesn't handle brace-enclosed blocks like
"if (x) { free (x); }". But that's ok, since there were none like
that in git sources.
Beware: if you do use the above snippet, note that it can
produce syntactically invalid C code. That happens when the
affected "if"-statement has a matching "else".
E.g., it would transform this
if (x)
free (x);
else
foo ();
into this:
free (x);
else
foo ();
There were none of those here, either.
If you're interested in automating detection of the useless
tests, you might like the useless-if-before-free script in gnulib:
[it *does* detect brace-enclosed free statements, and has a --name=S
option to make it detect free-like functions with different names]
http://git.sv.gnu.org/gitweb/?p=gnulib.git;a=blob;f=build-aux/useless-if-before-free
Addendum:
Remove one more (in imap-send.c), spotted by Jean-Luc Herren <jlh@gmx.ch>.
Signed-off-by: Jim Meyering <meyering@redhat.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
17 years ago
|
|
|
free(buffer);
|
|
|
|
value = ep + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void xdiff_clear_find_func(xdemitconf_t *xecfg)
|
|
|
|
{
|
|
|
|
if (xecfg->find_func) {
|
|
|
|
int i;
|
|
|
|
struct ff_regs *regs = xecfg->find_func_priv;
|
|
|
|
|
|
|
|
for (i = 0; i < regs->nr; i++)
|
|
|
|
regfree(®s->array[i].re);
|
|
|
|
free(regs->array);
|
|
|
|
free(regs);
|
|
|
|
xecfg->find_func = NULL;
|
|
|
|
xecfg->find_func_priv = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long xdiff_hash_string(const char *s, size_t len, long flags)
|
|
|
|
{
|
|
|
|
return xdl_hash_record(&s, s + len, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
int xdiff_compare_lines(const char *l1, long s1,
|
|
|
|
const char *l2, long s2, long flags)
|
|
|
|
{
|
|
|
|
return xdl_recmatch(l1, s1, l2, s2, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
int git_xmerge_style = -1;
|
|
|
|
|
|
|
|
int git_xmerge_config(const char *var, const char *value, void *cb)
|
|
|
|
{
|
|
|
|
if (!strcmp(var, "merge.conflictstyle")) {
|
|
|
|
if (!value)
|
|
|
|
die("'%s' is not a boolean", var);
|
|
|
|
if (!strcmp(value, "diff3"))
|
|
|
|
git_xmerge_style = XDL_MERGE_DIFF3;
|
|
|
|
else if (!strcmp(value, "merge"))
|
|
|
|
git_xmerge_style = 0;
|
|
|
|
else
|
|
|
|
die("unknown style '%s' given for '%s'",
|
|
|
|
value, var);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return git_default_config(var, value, cb);
|
|
|
|
}
|