|
|
|
#include "cache.h"
|
|
|
|
#include "pkt-line.h"
|
pkt-line: show packets in async processes as "sideband"
If you run "GIT_TRACE_PACKET=1 git push", you may get
confusing output like (line prefixes omitted for clarity):
packet: push< \1000eunpack ok0019ok refs/heads/master0000
packet: push< unpack ok
packet: push< ok refs/heads/master
packet: push< 0000
packet: push< 0000
Why do we see the data twice, once apparently wrapped inside
another pkt-line, and once unwrapped? Why do we get two
flush packets?
The answer is that we start an async process to demux the
sideband data. The first entry comes from the sideband
process reading the data, and the second from push itself.
Likewise, the first flush is inside the demuxed packet, and
the second is an actual sideband flush.
We can make this a bit more clear by marking the sideband
demuxer explicitly as "sideband" rather than "push". The
most elegant way to do this would be to simply call
packet_trace_identity() inside the sideband demuxer. But we
can't do that reliably, because it relies on a global
variable, which might be shared if pthreads are in use.
What we really need is thread-local storage for
packet_trace_identity. But the async code does not provide
an interface for that, and it would be messy to add it here
(we'd have to care about pthreads, initializing our
pthread_key_t ahead of time, etc).
So instead, let us just assume that any async process is
handling sideband data. That's always true now, and is
likely to remain so in the future.
The output looks like:
packet: sideband< \1000eunpack ok0019ok refs/heads/master0000
packet: push< unpack ok
packet: push< ok refs/heads/master
packet: push< 0000
packet: sideband< 0000
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
#include "run-command.h"
|
|
|
|
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
char packet_buffer[LARGE_PACKET_MAX];
|
|
|
|
static const char *packet_trace_prefix = "git";
|
|
|
|
static struct trace_key trace_packet = TRACE_KEY_INIT(PACKET);
|
|
|
|
static struct trace_key trace_pack = TRACE_KEY_INIT(PACKFILE);
|
|
|
|
|
|
|
|
void packet_trace_identity(const char *prog)
|
|
|
|
{
|
|
|
|
packet_trace_prefix = xstrdup(prog);
|
|
|
|
}
|
|
|
|
|
pkt-line: show packets in async processes as "sideband"
If you run "GIT_TRACE_PACKET=1 git push", you may get
confusing output like (line prefixes omitted for clarity):
packet: push< \1000eunpack ok0019ok refs/heads/master0000
packet: push< unpack ok
packet: push< ok refs/heads/master
packet: push< 0000
packet: push< 0000
Why do we see the data twice, once apparently wrapped inside
another pkt-line, and once unwrapped? Why do we get two
flush packets?
The answer is that we start an async process to demux the
sideband data. The first entry comes from the sideband
process reading the data, and the second from push itself.
Likewise, the first flush is inside the demuxed packet, and
the second is an actual sideband flush.
We can make this a bit more clear by marking the sideband
demuxer explicitly as "sideband" rather than "push". The
most elegant way to do this would be to simply call
packet_trace_identity() inside the sideband demuxer. But we
can't do that reliably, because it relies on a global
variable, which might be shared if pthreads are in use.
What we really need is thread-local storage for
packet_trace_identity. But the async code does not provide
an interface for that, and it would be messy to add it here
(we'd have to care about pthreads, initializing our
pthread_key_t ahead of time, etc).
So instead, let us just assume that any async process is
handling sideband data. That's always true now, and is
likely to remain so in the future.
The output looks like:
packet: sideband< \1000eunpack ok0019ok refs/heads/master0000
packet: push< unpack ok
packet: push< ok refs/heads/master
packet: push< 0000
packet: sideband< 0000
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
static const char *get_trace_prefix(void)
|
|
|
|
{
|
|
|
|
return in_async() ? "sideband" : packet_trace_prefix;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int packet_trace_pack(const char *buf, unsigned int len, int sideband)
|
|
|
|
{
|
|
|
|
if (!sideband) {
|
|
|
|
trace_verbatim(&trace_pack, buf, len);
|
|
|
|
return 1;
|
|
|
|
} else if (len && *buf == '\1') {
|
|
|
|
trace_verbatim(&trace_pack, buf + 1, len - 1);
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
/* it's another non-pack sideband */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void packet_trace(const char *buf, unsigned int len, int write)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct strbuf out;
|
|
|
|
static int in_pack, sideband;
|
|
|
|
|
|
|
|
if (!trace_want(&trace_packet) && !trace_want(&trace_pack))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (in_pack) {
|
|
|
|
if (packet_trace_pack(buf, len, sideband))
|
|
|
|
return;
|
|
|
|
} else if (starts_with(buf, "PACK") || starts_with(buf, "\1PACK")) {
|
|
|
|
in_pack = 1;
|
|
|
|
sideband = *buf == '\1';
|
|
|
|
packet_trace_pack(buf, len, sideband);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a note in the human-readable trace that the pack data
|
|
|
|
* started.
|
|
|
|
*/
|
|
|
|
buf = "PACK ...";
|
|
|
|
len = strlen(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!trace_want(&trace_packet))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* +32 is just a guess for header + quoting */
|
|
|
|
strbuf_init(&out, len+32);
|
|
|
|
|
|
|
|
strbuf_addf(&out, "packet: %12s%c ",
|
pkt-line: show packets in async processes as "sideband"
If you run "GIT_TRACE_PACKET=1 git push", you may get
confusing output like (line prefixes omitted for clarity):
packet: push< \1000eunpack ok0019ok refs/heads/master0000
packet: push< unpack ok
packet: push< ok refs/heads/master
packet: push< 0000
packet: push< 0000
Why do we see the data twice, once apparently wrapped inside
another pkt-line, and once unwrapped? Why do we get two
flush packets?
The answer is that we start an async process to demux the
sideband data. The first entry comes from the sideband
process reading the data, and the second from push itself.
Likewise, the first flush is inside the demuxed packet, and
the second is an actual sideband flush.
We can make this a bit more clear by marking the sideband
demuxer explicitly as "sideband" rather than "push". The
most elegant way to do this would be to simply call
packet_trace_identity() inside the sideband demuxer. But we
can't do that reliably, because it relies on a global
variable, which might be shared if pthreads are in use.
What we really need is thread-local storage for
packet_trace_identity. But the async code does not provide
an interface for that, and it would be messy to add it here
(we'd have to care about pthreads, initializing our
pthread_key_t ahead of time, etc).
So instead, let us just assume that any async process is
handling sideband data. That's always true now, and is
likely to remain so in the future.
The output looks like:
packet: sideband< \1000eunpack ok0019ok refs/heads/master0000
packet: push< unpack ok
packet: push< ok refs/heads/master
packet: push< 0000
packet: sideband< 0000
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
get_trace_prefix(), write ? '>' : '<');
|
|
|
|
|
|
|
|
/* XXX we should really handle printable utf8 */
|
|
|
|
for (i = 0; i < len; i++) {
|
|
|
|
/* suppress newlines */
|
|
|
|
if (buf[i] == '\n')
|
|
|
|
continue;
|
|
|
|
if (buf[i] >= 0x20 && buf[i] <= 0x7e)
|
|
|
|
strbuf_addch(&out, buf[i]);
|
|
|
|
else
|
|
|
|
strbuf_addf(&out, "\\%o", buf[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_addch(&out, '\n');
|
|
|
|
trace_strbuf(&trace_packet, &out);
|
|
|
|
strbuf_release(&out);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we buffered things up above (we don't, but we should),
|
|
|
|
* we'd flush it here
|
|
|
|
*/
|
|
|
|
void packet_flush(int fd)
|
|
|
|
{
|
|
|
|
packet_trace("0000", 4, 1);
|
|
|
|
write_or_die(fd, "0000", 4);
|
|
|
|
}
|
|
|
|
|
|
|
|
int packet_flush_gently(int fd)
|
|
|
|
{
|
|
|
|
packet_trace("0000", 4, 1);
|
|
|
|
if (write_in_full(fd, "0000", 4) == 4)
|
|
|
|
return 0;
|
|
|
|
return error("flush packet write failed");
|
|
|
|
}
|
|
|
|
|
|
|
|
void packet_buf_flush(struct strbuf *buf)
|
|
|
|
{
|
|
|
|
packet_trace("0000", 4, 1);
|
|
|
|
strbuf_add(buf, "0000", 4);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void set_packet_header(char *buf, const int size)
|
|
|
|
{
|
|
|
|
static char hexchar[] = "0123456789abcdef";
|
|
|
|
|
|
|
|
#define hex(a) (hexchar[(a) & 15])
|
|
|
|
buf[0] = hex(size >> 12);
|
|
|
|
buf[1] = hex(size >> 8);
|
|
|
|
buf[2] = hex(size >> 4);
|
|
|
|
buf[3] = hex(size);
|
|
|
|
#undef hex
|
|
|
|
}
|
|
|
|
|
|
|
|
static void format_packet(struct strbuf *out, const char *fmt, va_list args)
|
|
|
|
{
|
pkt-line: allow writing of LARGE_PACKET_MAX buffers
When we send out pkt-lines with refnames, we use a static
1000-byte buffer. This means that the maximum size of a ref
over the git protocol is around 950 bytes (the exact size
depends on the protocol line being written, but figure on a sha1
plus some boilerplate).
This is enough for any sane workflow, but occasionally odd
things happen (e.g., a bug may create a ref "foo/foo/foo/..."
accidentally). With the current code, you cannot even use
"push" to delete such a ref from a remote.
Let's switch to using a strbuf, with a hard-limit of
LARGE_PACKET_MAX (which is specified by the protocol). This
matches the size of the readers, as of 74543a0 (pkt-line:
provide a LARGE_PACKET_MAX static buffer, 2013-02-20).
Versions of git older than that will complain about our
large packets, but it's really no worse than the current
behavior. Right now the sender barfs with "impossibly long
line" trying to send the packet, and afterwards the reader
will barf with "protocol error: bad line length %d", which
is arguably better anyway.
Note that we're not really _solving_ the problem here, but
just bumping the limits. In theory, the length of a ref is
unbounded, and pkt-line can only represent sizes up to
65531 bytes. So we are just bumping the limit, not removing
it. But hopefully 64K should be enough for anyone.
As a bonus, by using a strbuf for the formatting we can
eliminate an unnecessary copy in format_buf_write.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
size_t orig_len, n;
|
|
|
|
|
pkt-line: allow writing of LARGE_PACKET_MAX buffers
When we send out pkt-lines with refnames, we use a static
1000-byte buffer. This means that the maximum size of a ref
over the git protocol is around 950 bytes (the exact size
depends on the protocol line being written, but figure on a sha1
plus some boilerplate).
This is enough for any sane workflow, but occasionally odd
things happen (e.g., a bug may create a ref "foo/foo/foo/..."
accidentally). With the current code, you cannot even use
"push" to delete such a ref from a remote.
Let's switch to using a strbuf, with a hard-limit of
LARGE_PACKET_MAX (which is specified by the protocol). This
matches the size of the readers, as of 74543a0 (pkt-line:
provide a LARGE_PACKET_MAX static buffer, 2013-02-20).
Versions of git older than that will complain about our
large packets, but it's really no worse than the current
behavior. Right now the sender barfs with "impossibly long
line" trying to send the packet, and afterwards the reader
will barf with "protocol error: bad line length %d", which
is arguably better anyway.
Note that we're not really _solving_ the problem here, but
just bumping the limits. In theory, the length of a ref is
unbounded, and pkt-line can only represent sizes up to
65531 bytes. So we are just bumping the limit, not removing
it. But hopefully 64K should be enough for anyone.
As a bonus, by using a strbuf for the formatting we can
eliminate an unnecessary copy in format_buf_write.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
orig_len = out->len;
|
|
|
|
strbuf_addstr(out, "0000");
|
|
|
|
strbuf_vaddf(out, fmt, args);
|
|
|
|
n = out->len - orig_len;
|
|
|
|
|
|
|
|
if (n > LARGE_PACKET_MAX)
|
|
|
|
die("protocol error: impossibly long line");
|
pkt-line: allow writing of LARGE_PACKET_MAX buffers
When we send out pkt-lines with refnames, we use a static
1000-byte buffer. This means that the maximum size of a ref
over the git protocol is around 950 bytes (the exact size
depends on the protocol line being written, but figure on a sha1
plus some boilerplate).
This is enough for any sane workflow, but occasionally odd
things happen (e.g., a bug may create a ref "foo/foo/foo/..."
accidentally). With the current code, you cannot even use
"push" to delete such a ref from a remote.
Let's switch to using a strbuf, with a hard-limit of
LARGE_PACKET_MAX (which is specified by the protocol). This
matches the size of the readers, as of 74543a0 (pkt-line:
provide a LARGE_PACKET_MAX static buffer, 2013-02-20).
Versions of git older than that will complain about our
large packets, but it's really no worse than the current
behavior. Right now the sender barfs with "impossibly long
line" trying to send the packet, and afterwards the reader
will barf with "protocol error: bad line length %d", which
is arguably better anyway.
Note that we're not really _solving_ the problem here, but
just bumping the limits. In theory, the length of a ref is
unbounded, and pkt-line can only represent sizes up to
65531 bytes. So we are just bumping the limit, not removing
it. But hopefully 64K should be enough for anyone.
As a bonus, by using a strbuf for the formatting we can
eliminate an unnecessary copy in format_buf_write.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
|
|
|
|
set_packet_header(&out->buf[orig_len], n);
|
pkt-line: allow writing of LARGE_PACKET_MAX buffers
When we send out pkt-lines with refnames, we use a static
1000-byte buffer. This means that the maximum size of a ref
over the git protocol is around 950 bytes (the exact size
depends on the protocol line being written, but figure on a sha1
plus some boilerplate).
This is enough for any sane workflow, but occasionally odd
things happen (e.g., a bug may create a ref "foo/foo/foo/..."
accidentally). With the current code, you cannot even use
"push" to delete such a ref from a remote.
Let's switch to using a strbuf, with a hard-limit of
LARGE_PACKET_MAX (which is specified by the protocol). This
matches the size of the readers, as of 74543a0 (pkt-line:
provide a LARGE_PACKET_MAX static buffer, 2013-02-20).
Versions of git older than that will complain about our
large packets, but it's really no worse than the current
behavior. Right now the sender barfs with "impossibly long
line" trying to send the packet, and afterwards the reader
will barf with "protocol error: bad line length %d", which
is arguably better anyway.
Note that we're not really _solving_ the problem here, but
just bumping the limits. In theory, the length of a ref is
unbounded, and pkt-line can only represent sizes up to
65531 bytes. So we are just bumping the limit, not removing
it. But hopefully 64K should be enough for anyone.
As a bonus, by using a strbuf for the formatting we can
eliminate an unnecessary copy in format_buf_write.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
packet_trace(out->buf + orig_len + 4, n - 4, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int packet_write_fmt_1(int fd, int gently,
|
|
|
|
const char *fmt, va_list args)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
ssize_t count;
|
|
|
|
|
|
|
|
format_packet(&buf, fmt, args);
|
|
|
|
count = write_in_full(fd, buf.buf, buf.len);
|
|
|
|
if (count == buf.len)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!gently) {
|
|
|
|
check_pipe(errno);
|
|
|
|
die_errno("packet write with format failed");
|
|
|
|
}
|
|
|
|
return error("packet write with format failed");
|
|
|
|
}
|
|
|
|
|
|
|
|
void packet_write_fmt(int fd, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
packet_write_fmt_1(fd, 0, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
int packet_write_fmt_gently(int fd, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
int status;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
status = packet_write_fmt_1(fd, 1, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int packet_write_gently(const int fd_out, const char *buf, size_t size)
|
|
|
|
{
|
|
|
|
static char packet_write_buffer[LARGE_PACKET_MAX];
|
|
|
|
size_t packet_size;
|
|
|
|
|
|
|
|
if (size > sizeof(packet_write_buffer) - 4)
|
|
|
|
return error("packet write failed - data exceeds max packet size");
|
|
|
|
|
|
|
|
packet_trace(buf, size, 1);
|
|
|
|
packet_size = size + 4;
|
|
|
|
set_packet_header(packet_write_buffer, packet_size);
|
|
|
|
memcpy(packet_write_buffer + 4, buf, size);
|
|
|
|
if (write_in_full(fd_out, packet_write_buffer, packet_size) == packet_size)
|
|
|
|
return 0;
|
|
|
|
return error("packet write failed");
|
|
|
|
}
|
|
|
|
|
|
|
|
void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
pkt-line: allow writing of LARGE_PACKET_MAX buffers
When we send out pkt-lines with refnames, we use a static
1000-byte buffer. This means that the maximum size of a ref
over the git protocol is around 950 bytes (the exact size
depends on the protocol line being written, but figure on a sha1
plus some boilerplate).
This is enough for any sane workflow, but occasionally odd
things happen (e.g., a bug may create a ref "foo/foo/foo/..."
accidentally). With the current code, you cannot even use
"push" to delete such a ref from a remote.
Let's switch to using a strbuf, with a hard-limit of
LARGE_PACKET_MAX (which is specified by the protocol). This
matches the size of the readers, as of 74543a0 (pkt-line:
provide a LARGE_PACKET_MAX static buffer, 2013-02-20).
Versions of git older than that will complain about our
large packets, but it's really no worse than the current
behavior. Right now the sender barfs with "impossibly long
line" trying to send the packet, and afterwards the reader
will barf with "protocol error: bad line length %d", which
is arguably better anyway.
Note that we're not really _solving_ the problem here, but
just bumping the limits. In theory, the length of a ref is
unbounded, and pkt-line can only represent sizes up to
65531 bytes. So we are just bumping the limit, not removing
it. But hopefully 64K should be enough for anyone.
As a bonus, by using a strbuf for the formatting we can
eliminate an unnecessary copy in format_buf_write.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
10 years ago
|
|
|
format_packet(buf, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
int write_packetized_from_fd(int fd_in, int fd_out)
|
|
|
|
{
|
|
|
|
static char buf[LARGE_PACKET_DATA_MAX];
|
|
|
|
int err = 0;
|
|
|
|
ssize_t bytes_to_write;
|
|
|
|
|
|
|
|
while (!err) {
|
|
|
|
bytes_to_write = xread(fd_in, buf, sizeof(buf));
|
|
|
|
if (bytes_to_write < 0)
|
|
|
|
return COPY_READ_ERROR;
|
|
|
|
if (bytes_to_write == 0)
|
|
|
|
break;
|
|
|
|
err = packet_write_gently(fd_out, buf, bytes_to_write);
|
|
|
|
}
|
|
|
|
if (!err)
|
|
|
|
err = packet_flush_gently(fd_out);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int write_packetized_from_buf(const char *src_in, size_t len, int fd_out)
|
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
size_t bytes_written = 0;
|
|
|
|
size_t bytes_to_write;
|
|
|
|
|
|
|
|
while (!err) {
|
|
|
|
if ((len - bytes_written) > LARGE_PACKET_DATA_MAX)
|
|
|
|
bytes_to_write = LARGE_PACKET_DATA_MAX;
|
|
|
|
else
|
|
|
|
bytes_to_write = len - bytes_written;
|
|
|
|
if (bytes_to_write == 0)
|
|
|
|
break;
|
|
|
|
err = packet_write_gently(fd_out, src_in + bytes_written, bytes_to_write);
|
|
|
|
bytes_written += bytes_to_write;
|
|
|
|
}
|
|
|
|
if (!err)
|
|
|
|
err = packet_flush_gently(fd_out);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
static int get_packet_data(int fd, char **src_buf, size_t *src_size,
|
|
|
|
void *dst, unsigned size, int options)
|
|
|
|
{
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
if (fd >= 0 && src_buf && *src_buf)
|
|
|
|
die("BUG: multiple sources given to packet_read");
|
|
|
|
|
|
|
|
/* Read up to "size" bytes from our source, whatever it is. */
|
|
|
|
if (src_buf && *src_buf) {
|
|
|
|
ret = size < *src_size ? size : *src_size;
|
|
|
|
memcpy(dst, *src_buf, ret);
|
|
|
|
*src_buf += ret;
|
|
|
|
*src_size -= ret;
|
|
|
|
} else {
|
|
|
|
ret = read_in_full(fd, dst, size);
|
|
|
|
if (ret < 0)
|
|
|
|
die_errno("read error");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* And complain if we didn't get enough bytes to satisfy the read. */
|
|
|
|
if (ret < size) {
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
if (options & PACKET_READ_GENTLE_ON_EOF)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
die("The remote end hung up unexpectedly");
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int packet_length(const char *linelen)
|
|
|
|
{
|
|
|
|
int val = hex2chr(linelen);
|
|
|
|
return (val < 0) ? val : (val << 8) | hex2chr(linelen + 2);
|
|
|
|
}
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
int packet_read(int fd, char **src_buf, size_t *src_len,
|
|
|
|
char *buffer, unsigned size, int options)
|
|
|
|
{
|
|
|
|
int len, ret;
|
|
|
|
char linelen[4];
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
ret = get_packet_data(fd, src_buf, src_len, linelen, 4, options);
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
len = packet_length(linelen);
|
|
|
|
if (len < 0)
|
|
|
|
die("protocol error: bad line length character: %.4s", linelen);
|
|
|
|
if (!len) {
|
|
|
|
packet_trace("0000", 4, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
len -= 4;
|
|
|
|
if (len >= size)
|
|
|
|
die("protocol error: bad line length %d", len);
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
ret = get_packet_data(fd, src_buf, src_len, buffer, len, options);
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
pkt-line: teach packet_read_line to chomp newlines
The packets sent during ref negotiation are all terminated
by newline; even though the code to chomp these newlines is
short, we end up doing it in a lot of places.
This patch teaches packet_read_line to auto-chomp the
trailing newline; this lets us get rid of a lot of inline
chomping code.
As a result, some call-sites which are not reading
line-oriented data (e.g., when reading chunks of packfiles
alongside sideband) transition away from packet_read_line to
the generic packet_read interface. This patch converts all
of the existing callsites.
Since the function signature of packet_read_line does not
change (but its behavior does), there is a possibility of
new callsites being introduced in later commits, silently
introducing an incompatibility. However, since a later
patch in this series will change the signature, such a
commit would have to be merged directly into this commit,
not to the tip of the series; we can therefore ignore the
issue.
This is an internal cleanup and should produce no change of
behavior in the normal case. However, there is one corner
case to note. Callers of packet_read_line have never been
able to tell the difference between a flush packet ("0000")
and an empty packet ("0004"), as both cause packet_read_line
to return a length of 0. Readers treat them identically,
even though Documentation/technical/protocol-common.txt says
we must not; it also says that implementations should not
send an empty pkt-line.
By stripping out the newline before the result gets to the
caller, we will now treat the newline-only packet ("0005\n")
the same as an empty packet, which in turn gets treated like
a flush packet. In practice this doesn't matter, as neither
empty nor newline-only packets are part of git's protocols
(at least not for the line-oriented bits, and readers who
are not expecting line-oriented packets will be calling
packet_read directly, anyway). But even if we do decide to
care about the distinction later, it is orthogonal to this
patch. The right place to tighten would be to stop treating
empty packets as flush packets, and this change does not
make doing so any harder.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
|
|
|
|
if ((options & PACKET_READ_CHOMP_NEWLINE) &&
|
|
|
|
len && buffer[len-1] == '\n')
|
|
|
|
len--;
|
|
|
|
|
|
|
|
buffer[len] = 0;
|
|
|
|
packet_trace(buffer, len, 0);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
static char *packet_read_line_generic(int fd,
|
|
|
|
char **src, size_t *src_len,
|
|
|
|
int *dst_len)
|
|
|
|
{
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
int len = packet_read(fd, src, src_len,
|
|
|
|
packet_buffer, sizeof(packet_buffer),
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
PACKET_READ_CHOMP_NEWLINE);
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
if (dst_len)
|
|
|
|
*dst_len = len;
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
return len ? packet_buffer : NULL;
|
|
|
|
}
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
char *packet_read_line(int fd, int *len_p)
|
|
|
|
{
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
return packet_read_line_generic(fd, NULL, NULL, len_p);
|
|
|
|
}
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
char *packet_read_line_buf(char **src, size_t *src_len, int *dst_len)
|
|
|
|
{
|
|
|
|
return packet_read_line_generic(-1, src, src_len, dst_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out)
|
|
|
|
{
|
|
|
|
int packet_len;
|
|
|
|
|
|
|
|
size_t orig_len = sb_out->len;
|
|
|
|
size_t orig_alloc = sb_out->alloc;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
strbuf_grow(sb_out, LARGE_PACKET_DATA_MAX);
|
|
|
|
packet_len = packet_read(fd_in, NULL, NULL,
|
|
|
|
/* strbuf_grow() above always allocates one extra byte to
|
|
|
|
* store a '\0' at the end of the string. packet_read()
|
|
|
|
* writes a '\0' extra byte at the end, too. Let it know
|
|
|
|
* that there is already room for the extra byte.
|
|
|
|
*/
|
|
|
|
sb_out->buf + sb_out->len, LARGE_PACKET_DATA_MAX+1,
|
|
|
|
PACKET_READ_GENTLE_ON_EOF);
|
|
|
|
if (packet_len <= 0)
|
|
|
|
break;
|
|
|
|
sb_out->len += packet_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (packet_len < 0) {
|
|
|
|
if (orig_alloc == 0)
|
|
|
|
strbuf_release(sb_out);
|
|
|
|
else
|
|
|
|
strbuf_setlen(sb_out, orig_len);
|
|
|
|
return packet_len;
|
|
|
|
}
|
|
|
|
return sb_out->len - orig_len;
|
|
|
|
}
|