|
|
|
#ifndef PKTLINE_H
|
|
|
|
#define PKTLINE_H
|
|
|
|
|
|
|
|
#include "git-compat-util.h"
|
|
|
|
#include "strbuf.h"
|
|
|
|
#include "sideband.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write a packetized stream, where each line is preceded by
|
|
|
|
* its length (including the header) as a 4-byte hex number.
|
|
|
|
* A length of 'zero' means end of stream (and a length of 1-3
|
|
|
|
* would be an error).
|
|
|
|
*
|
|
|
|
* This is all pretty stupid, but we use this packetized line
|
|
|
|
* format to make a streaming format possible without ever
|
|
|
|
* over-running the read buffers. That way we'll never read
|
|
|
|
* into what might be the pack data (which should go to another
|
|
|
|
* process entirely).
|
|
|
|
*
|
|
|
|
* The writing side could use stdio, but since the reading
|
|
|
|
* side can't, we stay with pure read/write interfaces.
|
|
|
|
*/
|
|
|
|
void packet_flush(int fd);
|
|
|
|
void packet_delim(int fd);
|
|
|
|
void packet_response_end(int fd);
|
|
|
|
void packet_write_fmt(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
|
|
|
|
void packet_buf_flush(struct strbuf *buf);
|
|
|
|
void packet_buf_delim(struct strbuf *buf);
|
|
|
|
void set_packet_header(char *buf, int size);
|
|
|
|
void packet_write(int fd_out, const char *buf, size_t size);
|
|
|
|
void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
|
|
|
|
void packet_buf_write_len(struct strbuf *buf, const char *data, size_t len);
|
|
|
|
int packet_flush_gently(int fd);
|
|
|
|
int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
|
|
|
|
int write_packetized_from_fd(int fd_in, int fd_out);
|
|
|
|
int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
|
|
|
|
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
/*
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
* Read a packetized line into the buffer, which must be at least size bytes
|
|
|
|
* long. The return value specifies the number of bytes read into the buffer.
|
|
|
|
*
|
|
|
|
* If src_buffer and *src_buffer are not NULL, it should point to a buffer
|
|
|
|
* containing the packet data to parse, of at least *src_len bytes. After the
|
|
|
|
* function returns, src_buf will be incremented and src_len decremented by the
|
|
|
|
* number of bytes consumed.
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
*
|
|
|
|
* If src_buffer (or *src_buffer) is NULL, then data is read from the
|
|
|
|
* descriptor "fd".
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
*
|
|
|
|
* If options does not contain PACKET_READ_GENTLE_ON_EOF, we will die under any
|
|
|
|
* of the following conditions:
|
|
|
|
*
|
|
|
|
* 1. Read error from descriptor.
|
|
|
|
*
|
|
|
|
* 2. Protocol error from the remote (e.g., bogus length characters).
|
|
|
|
*
|
|
|
|
* 3. Receiving a packet larger than "size" bytes.
|
|
|
|
*
|
|
|
|
* 4. Truncated output from the remote (e.g., we expected a packet but got
|
|
|
|
* EOF, or we got a partial packet followed by EOF).
|
|
|
|
*
|
|
|
|
* If options does contain PACKET_READ_GENTLE_ON_EOF, we will not die on
|
|
|
|
* condition 4 (truncated input), but instead return -1. However, we will still
|
|
|
|
* die for the other 3 conditions.
|
pkt-line: teach packet_read_line to chomp newlines
The packets sent during ref negotiation are all terminated
by newline; even though the code to chomp these newlines is
short, we end up doing it in a lot of places.
This patch teaches packet_read_line to auto-chomp the
trailing newline; this lets us get rid of a lot of inline
chomping code.
As a result, some call-sites which are not reading
line-oriented data (e.g., when reading chunks of packfiles
alongside sideband) transition away from packet_read_line to
the generic packet_read interface. This patch converts all
of the existing callsites.
Since the function signature of packet_read_line does not
change (but its behavior does), there is a possibility of
new callsites being introduced in later commits, silently
introducing an incompatibility. However, since a later
patch in this series will change the signature, such a
commit would have to be merged directly into this commit,
not to the tip of the series; we can therefore ignore the
issue.
This is an internal cleanup and should produce no change of
behavior in the normal case. However, there is one corner
case to note. Callers of packet_read_line have never been
able to tell the difference between a flush packet ("0000")
and an empty packet ("0004"), as both cause packet_read_line
to return a length of 0. Readers treat them identically,
even though Documentation/technical/protocol-common.txt says
we must not; it also says that implementations should not
send an empty pkt-line.
By stripping out the newline before the result gets to the
caller, we will now treat the newline-only packet ("0005\n")
the same as an empty packet, which in turn gets treated like
a flush packet. In practice this doesn't matter, as neither
empty nor newline-only packets are part of git's protocols
(at least not for the line-oriented bits, and readers who
are not expecting line-oriented packets will be calling
packet_read directly, anyway). But even if we do decide to
care about the distinction later, it is orthogonal to this
patch. The right place to tighten would be to stop treating
empty packets as flush packets, and this change does not
make doing so any harder.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
*
|
|
|
|
* If options contains PACKET_READ_CHOMP_NEWLINE, a trailing newline (if
|
|
|
|
* present) is removed from the buffer before returning.
|
pack-protocol.txt: accept error packets in any context
In the Git pack protocol definition, an error packet may appear only in
a certain context. However, servers can face a runtime error (e.g. I/O
error) at an arbitrary timing. This patch changes the protocol to allow
an error packet to be sent instead of any packet.
Without this protocol spec change, when a server cannot process a
request, there's no way to tell that to a client. Since the server
cannot produce a valid response, it would be forced to cut a connection
without telling why. With this protocol spec change, the server can be
more gentle in this situation. An old client may see these error packets
as an unexpected packet, but this is not worse than having an unexpected
EOF.
Following this protocol spec change, the error packet handling code is
moved to pkt-line.c. Implementation wise, this implementation uses
pkt-line to communicate with a subprocess. Since this is not a part of
Git protocol, it's possible that a packet that is not supposed to be an
error packet is mistakenly parsed as an error packet. This error packet
handling is enabled only for the Git pack protocol parsing code
considering this.
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
*
|
|
|
|
* If options contains PACKET_READ_DIE_ON_ERR_PACKET, it dies when it sees an
|
|
|
|
* ERR packet.
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
*/
|
pack-protocol.txt: accept error packets in any context
In the Git pack protocol definition, an error packet may appear only in
a certain context. However, servers can face a runtime error (e.g. I/O
error) at an arbitrary timing. This patch changes the protocol to allow
an error packet to be sent instead of any packet.
Without this protocol spec change, when a server cannot process a
request, there's no way to tell that to a client. Since the server
cannot produce a valid response, it would be forced to cut a connection
without telling why. With this protocol spec change, the server can be
more gentle in this situation. An old client may see these error packets
as an unexpected packet, but this is not worse than having an unexpected
EOF.
Following this protocol spec change, the error packet handling code is
moved to pkt-line.c. Implementation wise, this implementation uses
pkt-line to communicate with a subprocess. Since this is not a part of
Git protocol, it's possible that a packet that is not supposed to be an
error packet is mistakenly parsed as an error packet. This error packet
handling is enabled only for the Git pack protocol parsing code
considering this.
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
6 years ago
|
|
|
#define PACKET_READ_GENTLE_ON_EOF (1u<<0)
|
|
|
|
#define PACKET_READ_CHOMP_NEWLINE (1u<<1)
|
|
|
|
#define PACKET_READ_DIE_ON_ERR_PACKET (1u<<2)
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
int packet_read(int fd, char **src_buffer, size_t *src_len, char
|
|
|
|
*buffer, unsigned size, int options);
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Convert a four hex digit packet line length header into its numeric
|
|
|
|
* representation.
|
|
|
|
*
|
|
|
|
* If lenbuf_hex contains non-hex characters, return -1. Otherwise, return the
|
|
|
|
* numeric value of the length header.
|
|
|
|
*/
|
|
|
|
int packet_length(const char lenbuf_hex[4]);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read a packetized line into a buffer like the 'packet_read()' function but
|
|
|
|
* returns an 'enum packet_read_status' which indicates the status of the read.
|
|
|
|
* The number of bytes read will be assigned to *pktlen if the status of the
|
|
|
|
* read was 'PACKET_READ_NORMAL'.
|
|
|
|
*/
|
|
|
|
enum packet_read_status {
|
|
|
|
PACKET_READ_EOF,
|
|
|
|
PACKET_READ_NORMAL,
|
|
|
|
PACKET_READ_FLUSH,
|
|
|
|
PACKET_READ_DELIM,
|
|
|
|
PACKET_READ_RESPONSE_END,
|
|
|
|
};
|
|
|
|
enum packet_read_status packet_read_with_status(int fd, char **src_buffer,
|
|
|
|
size_t *src_len, char *buffer,
|
|
|
|
unsigned size, int *pktlen,
|
|
|
|
int options);
|
|
|
|
|
pkt-line: teach packet_read_line to chomp newlines
The packets sent during ref negotiation are all terminated
by newline; even though the code to chomp these newlines is
short, we end up doing it in a lot of places.
This patch teaches packet_read_line to auto-chomp the
trailing newline; this lets us get rid of a lot of inline
chomping code.
As a result, some call-sites which are not reading
line-oriented data (e.g., when reading chunks of packfiles
alongside sideband) transition away from packet_read_line to
the generic packet_read interface. This patch converts all
of the existing callsites.
Since the function signature of packet_read_line does not
change (but its behavior does), there is a possibility of
new callsites being introduced in later commits, silently
introducing an incompatibility. However, since a later
patch in this series will change the signature, such a
commit would have to be merged directly into this commit,
not to the tip of the series; we can therefore ignore the
issue.
This is an internal cleanup and should produce no change of
behavior in the normal case. However, there is one corner
case to note. Callers of packet_read_line have never been
able to tell the difference between a flush packet ("0000")
and an empty packet ("0004"), as both cause packet_read_line
to return a length of 0. Readers treat them identically,
even though Documentation/technical/protocol-common.txt says
we must not; it also says that implementations should not
send an empty pkt-line.
By stripping out the newline before the result gets to the
caller, we will now treat the newline-only packet ("0005\n")
the same as an empty packet, which in turn gets treated like
a flush packet. In practice this doesn't matter, as neither
empty nor newline-only packets are part of git's protocols
(at least not for the line-oriented bits, and readers who
are not expecting line-oriented packets will be calling
packet_read directly, anyway). But even if we do decide to
care about the distinction later, it is orthogonal to this
patch. The right place to tighten would be to stop treating
empty packets as flush packets, and this change does not
make doing so any harder.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
/*
|
|
|
|
* Convenience wrapper for packet_read that is not gentle, and sets the
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
* CHOMP_NEWLINE option. The return value is NULL for a flush packet,
|
|
|
|
* and otherwise points to a static buffer (that may be overwritten by
|
|
|
|
* subsequent calls). If the size parameter is not NULL, the length of the
|
|
|
|
* packet is written to it.
|
pkt-line: teach packet_read_line to chomp newlines
The packets sent during ref negotiation are all terminated
by newline; even though the code to chomp these newlines is
short, we end up doing it in a lot of places.
This patch teaches packet_read_line to auto-chomp the
trailing newline; this lets us get rid of a lot of inline
chomping code.
As a result, some call-sites which are not reading
line-oriented data (e.g., when reading chunks of packfiles
alongside sideband) transition away from packet_read_line to
the generic packet_read interface. This patch converts all
of the existing callsites.
Since the function signature of packet_read_line does not
change (but its behavior does), there is a possibility of
new callsites being introduced in later commits, silently
introducing an incompatibility. However, since a later
patch in this series will change the signature, such a
commit would have to be merged directly into this commit,
not to the tip of the series; we can therefore ignore the
issue.
This is an internal cleanup and should produce no change of
behavior in the normal case. However, there is one corner
case to note. Callers of packet_read_line have never been
able to tell the difference between a flush packet ("0000")
and an empty packet ("0004"), as both cause packet_read_line
to return a length of 0. Readers treat them identically,
even though Documentation/technical/protocol-common.txt says
we must not; it also says that implementations should not
send an empty pkt-line.
By stripping out the newline before the result gets to the
caller, we will now treat the newline-only packet ("0005\n")
the same as an empty packet, which in turn gets treated like
a flush packet. In practice this doesn't matter, as neither
empty nor newline-only packets are part of git's protocols
(at least not for the line-oriented bits, and readers who
are not expecting line-oriented packets will be calling
packet_read directly, anyway). But even if we do decide to
care about the distinction later, it is orthogonal to this
patch. The right place to tighten would be to stop treating
empty packets as flush packets, and this change does not
make doing so any harder.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
*/
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
char *packet_read_line(int fd, int *size);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Convenience wrapper for packet_read that sets the PACKET_READ_GENTLE_ON_EOF
|
|
|
|
* and CHOMP_NEWLINE options. The return value specifies the number of bytes
|
|
|
|
* read into the buffer or -1 on truncated input. If the *dst_line parameter
|
|
|
|
* is not NULL it will return NULL for a flush packet or when the number of
|
|
|
|
* bytes copied is zero and otherwise points to a static buffer (that may be
|
|
|
|
* overwritten by subsequent calls). If the size parameter is not NULL, the
|
|
|
|
* length of the packet is written to it.
|
|
|
|
*/
|
|
|
|
int packet_read_line_gently(int fd, int *size, char **dst_line);
|
|
|
|
|
pkt-line: share buffer/descriptor reading implementation
The packet_read function reads from a descriptor. The
packet_get_line function is similar, but reads from an
in-memory buffer, and uses a completely separate
implementation. This patch teaches the generic packet_read
function to accept either source, and we can do away with
packet_get_line's implementation.
There are two other differences to account for between the
old and new functions. The first is that we used to read
into a strbuf, but now read into a fixed size buffer. The
only two callers are fine with that, and in fact it
simplifies their code, since they can use the same
static-buffer interface as the rest of the packet_read_line
callers (and we provide a similar convenience wrapper for
reading from a buffer rather than a descriptor).
This is technically an externally-visible behavior change in
that we used to accept arbitrary sized packets up to 65532
bytes, and now cap out at LARGE_PACKET_MAX, 65520. In
practice this doesn't matter, as we use it only for parsing
smart-http headers (of which there is exactly one defined,
and it is small and fixed-size). And any extension headers
would be breaking the protocol to go over LARGE_PACKET_MAX
anyway.
The other difference is that packet_get_line would return
on error rather than dying. However, both callers of
packet_get_line are actually improved by dying.
The first caller does its own error checking, but we can
drop that; as a result, we'll actually get more specific
reporting about protocol breakage when packet_read dies
internally. The only downside is that packet_read will not
print the smart-http URL that failed, but that's not a big
deal; anybody not debugging can already see the remote's URL
already, and anybody debugging would want to run with
GIT_CURL_VERBOSE anyway to see way more information.
The second caller, which is just trying to skip past any
extra smart-http headers (of which there are none defined,
but which we allow to keep room for future expansion), did
not error check at all. As a result, it would treat an error
just like a flush packet. The resulting mess would generally
cause an error later in get_remote_heads, but now we get
error reporting much closer to the source of the problem.
Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
/*
|
|
|
|
* Same as packet_read_line, but read from a buf rather than a descriptor;
|
|
|
|
* see packet_read for details on how src_* is used.
|
|
|
|
*/
|
|
|
|
char *packet_read_line_buf(char **src_buf, size_t *src_len, int *size);
|
pkt-line: provide a generic reading function with options
Originally we had a single function for reading packetized
data: packet_read_line. Commit 46284dd grew a more "gentle"
form, packet_read, that returns an error instead of dying
upon reading a truncated input stream. However, it is not
clear from the names which should be called, or what the
difference is.
Let's instead make packet_read be a generic public interface
that can take option flags, and update the single callsite
that uses it. This is less code, more clear, and paves the
way for introducing more options into the generic interface
later. The function signature is changed, so there should be
no hidden conflicts with topics in flight.
While we're at it, we'll document how error conditions are
handled based on the options, and rename the confusing
"return_line_fail" option to "gentle_on_eof". While we are
cleaning up the names, we can drop the "return_line_fail"
checks in packet_read_internal entirely. They look like
this:
ret = safe_read(..., return_line_fail);
if (return_line_fail && ret < 0)
...
The check for return_line_fail is a no-op; safe_read will
only ever return an error value if return_line_fail was true
in the first place.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
|
|
|
|
/*
|
|
|
|
* Reads a stream of variable sized packets until a flush packet is detected.
|
|
|
|
*/
|
|
|
|
ssize_t read_packetized_to_strbuf(int fd_in, struct strbuf *sb_out);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Receive multiplexed output stream over git native protocol.
|
|
|
|
* in_stream is the input stream from the remote, which carries data
|
|
|
|
* in pkt_line format with band designator. Demultiplex it into out
|
|
|
|
* and err and return error appropriately. Band #1 carries the
|
|
|
|
* primary payload. Things coming over band #2 is not necessarily
|
|
|
|
* error; they are usually informative message on the standard error
|
|
|
|
* stream, aka "verbose"). A message over band #3 is a signal that
|
|
|
|
* the remote died unexpectedly. A flush() concludes the stream.
|
|
|
|
*
|
|
|
|
* Returns SIDEBAND_FLUSH upon a normal conclusion, and SIDEBAND_PROTOCOL_ERROR
|
|
|
|
* or SIDEBAND_REMOTE_ERROR if an error occurred.
|
|
|
|
*/
|
|
|
|
int recv_sideband(const char *me, int in_stream, int out);
|
|
|
|
|
|
|
|
struct packet_reader {
|
|
|
|
/* source file descriptor */
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
/* source buffer and its size */
|
|
|
|
char *src_buffer;
|
|
|
|
size_t src_len;
|
|
|
|
|
|
|
|
/* buffer that pkt-lines are read into and its size */
|
|
|
|
char *buffer;
|
|
|
|
unsigned buffer_size;
|
|
|
|
|
|
|
|
/* options to be used during reads */
|
|
|
|
int options;
|
|
|
|
|
|
|
|
/* status of the last read */
|
|
|
|
enum packet_read_status status;
|
|
|
|
|
|
|
|
/* length of data read during the last read */
|
|
|
|
int pktlen;
|
|
|
|
|
|
|
|
/* the last line read */
|
|
|
|
const char *line;
|
|
|
|
|
|
|
|
/* indicates if a line has been peeked */
|
|
|
|
int line_peeked;
|
|
|
|
|
|
|
|
unsigned use_sideband : 1;
|
|
|
|
const char *me;
|
|
|
|
|
|
|
|
/* hash algorithm in use */
|
|
|
|
const struct git_hash_algo *hash_algo;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize a 'struct packet_reader' object which is an
|
|
|
|
* abstraction around the 'packet_read_with_status()' function.
|
|
|
|
*/
|
|
|
|
void packet_reader_init(struct packet_reader *reader, int fd,
|
|
|
|
char *src_buffer, size_t src_len,
|
|
|
|
int options);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform a packet read and return the status of the read.
|
|
|
|
* The values of 'pktlen' and 'line' are updated based on the status of the
|
|
|
|
* read as follows:
|
|
|
|
*
|
|
|
|
* PACKET_READ_ERROR: 'pktlen' is set to '-1' and 'line' is set to NULL
|
|
|
|
* PACKET_READ_NORMAL: 'pktlen' is set to the number of bytes read
|
|
|
|
* 'line' is set to point at the read line
|
|
|
|
* PACKET_READ_FLUSH: 'pktlen' is set to '0' and 'line' is set to NULL
|
|
|
|
*/
|
|
|
|
enum packet_read_status packet_reader_read(struct packet_reader *reader);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Peek the next packet line without consuming it and return the status.
|
|
|
|
* The next call to 'packet_reader_read()' will perform a read of the same line
|
|
|
|
* that was peeked, consuming the line.
|
|
|
|
*
|
|
|
|
* Peeking multiple times without calling 'packet_reader_read()' will return
|
|
|
|
* the same result.
|
|
|
|
*/
|
|
|
|
enum packet_read_status packet_reader_peek(struct packet_reader *reader);
|
|
|
|
|
|
|
|
#define DEFAULT_PACKET_MAX 1000
|
|
|
|
#define LARGE_PACKET_MAX 65520
|
|
|
|
#define LARGE_PACKET_DATA_MAX (LARGE_PACKET_MAX - 4)
|
pkt-line: provide a LARGE_PACKET_MAX static buffer
Most of the callers of packet_read_line just read into a
static 1000-byte buffer (callers which handle arbitrary
binary data already use LARGE_PACKET_MAX). This works fine
in practice, because:
1. The only variable-sized data in these lines is a ref
name, and refs tend to be a lot shorter than 1000
characters.
2. When sending ref lines, git-core always limits itself
to 1000 byte packets.
However, the only limit given in the protocol specification
in Documentation/technical/protocol-common.txt is
LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in
pack-protocol.txt, and then only describing what we write,
not as a specific limit for readers.
This patch lets us bump the 1000-byte limit to
LARGE_PACKET_MAX. Even though git-core will never write a
packet where this makes a difference, there are two good
reasons to do this:
1. Other git implementations may have followed
protocol-common.txt and used a larger maximum size. We
don't bump into it in practice because it would involve
very long ref names.
2. We may want to increase the 1000-byte limit one day.
Since packets are transferred before any capabilities,
it's difficult to do this in a backwards-compatible
way. But if we bump the size of buffer the readers can
handle, eventually older versions of git will be
obsolete enough that we can justify bumping the
writers, as well. We don't have plans to do this
anytime soon, but there is no reason not to start the
clock ticking now.
Just bumping all of the reading bufs to LARGE_PACKET_MAX
would waste memory. Instead, since most readers just read
into a temporary buffer anyway, let's provide a single
static buffer that all callers can use. We can further wrap
this detail away by having the packet_read_line wrapper just
use the buffer transparently and return a pointer to the
static storage. That covers most of the cases, and the
remaining ones already read into their own LARGE_PACKET_MAX
buffers.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
12 years ago
|
|
|
extern char packet_buffer[LARGE_PACKET_MAX];
|
|
|
|
|
|
|
|
struct packet_writer {
|
|
|
|
int dest_fd;
|
|
|
|
unsigned use_sideband : 1;
|
|
|
|
};
|
|
|
|
|
|
|
|
void packet_writer_init(struct packet_writer *writer, int dest_fd);
|
|
|
|
|
|
|
|
/* These functions die upon failure. */
|
|
|
|
__attribute__((format (printf, 2, 3)))
|
|
|
|
void packet_writer_write(struct packet_writer *writer, const char *fmt, ...);
|
|
|
|
__attribute__((format (printf, 2, 3)))
|
|
|
|
void packet_writer_error(struct packet_writer *writer, const char *fmt, ...);
|
|
|
|
void packet_writer_delim(struct packet_writer *writer);
|
|
|
|
void packet_writer_flush(struct packet_writer *writer);
|
|
|
|
|
|
|
|
#endif
|