467 lines
15 KiB
Rust
467 lines
15 KiB
Rust
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation: version 2 of the License, dated June 1991.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License along
|
|
// with this program; if not, see <https://www.gnu.org/licenses/>.
|
|
|
|
use std::error::Error;
|
|
use std::fmt::{self, Debug, Display};
|
|
use std::io::{self, Write};
|
|
use std::os::raw::c_void;
|
|
|
|
pub const GIT_MAX_RAWSZ: usize = 32;
|
|
|
|
/// An error indicating an invalid hash algorithm.
|
|
///
|
|
/// The contained `u32` is the same as the `algo` field in `ObjectID`.
|
|
#[derive(Debug, Copy, Clone)]
|
|
pub struct InvalidHashAlgorithm(pub u32);
|
|
|
|
impl Display for InvalidHashAlgorithm {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "invalid hash algorithm {}", self.0)
|
|
}
|
|
}
|
|
|
|
impl Error for InvalidHashAlgorithm {}
|
|
|
|
/// A binary object ID.
|
|
#[repr(C)]
|
|
#[derive(Clone, Ord, PartialOrd, Eq, PartialEq)]
|
|
pub struct ObjectID {
|
|
pub hash: [u8; GIT_MAX_RAWSZ],
|
|
pub algo: u32,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl ObjectID {
|
|
/// Return a new object ID with the given algorithm and hash.
|
|
///
|
|
/// `hash` must be exactly the proper length for `algo` and this function panics if it is not.
|
|
/// The extra internal storage of `hash`, if any, is zero filled.
|
|
pub fn new(algo: HashAlgorithm, hash: &[u8]) -> Self {
|
|
let mut data = [0u8; GIT_MAX_RAWSZ];
|
|
// This verifies that the length of `hash` is correct.
|
|
data[0..algo.raw_len()].copy_from_slice(hash);
|
|
Self {
|
|
hash: data,
|
|
algo: algo as u32,
|
|
}
|
|
}
|
|
|
|
/// Return the algorithm for this object ID.
|
|
///
|
|
/// If the algorithm set internally is not valid, this function panics.
|
|
pub fn algo(&self) -> Result<HashAlgorithm, InvalidHashAlgorithm> {
|
|
HashAlgorithm::from_u32(self.algo).ok_or(InvalidHashAlgorithm(self.algo))
|
|
}
|
|
|
|
pub fn as_slice(&self) -> Result<&[u8], InvalidHashAlgorithm> {
|
|
match HashAlgorithm::from_u32(self.algo) {
|
|
Some(algo) => Ok(&self.hash[0..algo.raw_len()]),
|
|
None => Err(InvalidHashAlgorithm(self.algo)),
|
|
}
|
|
}
|
|
|
|
pub fn as_mut_slice(&mut self) -> Result<&mut [u8], InvalidHashAlgorithm> {
|
|
match HashAlgorithm::from_u32(self.algo) {
|
|
Some(algo) => Ok(&mut self.hash[0..algo.raw_len()]),
|
|
None => Err(InvalidHashAlgorithm(self.algo)),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Display for ObjectID {
|
|
/// Format this object ID as a hex object ID.
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let hash = self.as_slice().unwrap();
|
|
for x in hash {
|
|
write!(f, "{:02x}", x)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl Debug for ObjectID {
|
|
/// Format this object ID as a hex object ID with a colon and name appended to it.
|
|
///
|
|
/// ```
|
|
/// assert_eq!(
|
|
/// format!("{:?}", HashAlgorithm::SHA256.null_oid()),
|
|
/// "0000000000000000000000000000000000000000000000000000000000000000:sha256"
|
|
/// );
|
|
/// ```
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let hash = match self.as_slice() {
|
|
Ok(hash) => hash,
|
|
Err(_) => &self.hash,
|
|
};
|
|
for x in hash {
|
|
write!(f, "{:02x}", x)?;
|
|
}
|
|
match self.algo() {
|
|
Ok(algo) => write!(f, ":{}", algo.name()),
|
|
Err(e) => write!(f, ":invalid-hash-algo-{}", e.0),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A trait to implement hashing with a cryptographic algorithm.
|
|
pub trait CryptoDigest {
|
|
/// Return true if this digest is safe for use with untrusted data, false otherwise.
|
|
fn is_safe(&self) -> bool;
|
|
|
|
/// Update the digest with the specified data.
|
|
fn update(&mut self, data: &[u8]);
|
|
|
|
/// Return an object ID, consuming the hasher.
|
|
fn into_oid(self) -> ObjectID;
|
|
|
|
/// Return a hash as a `Vec`, consuming the hasher.
|
|
fn into_vec(self) -> Vec<u8>;
|
|
}
|
|
|
|
/// A structure to hash data with a cryptographic hash algorithm.
|
|
///
|
|
/// Instances of this class are safe for use with untrusted data, provided Git has been compiled
|
|
/// with a collision-detecting implementation of SHA-1.
|
|
pub struct CryptoHasher {
|
|
algo: HashAlgorithm,
|
|
ctx: *mut c_void,
|
|
}
|
|
|
|
impl CryptoHasher {
|
|
/// Create a new hasher with the algorithm specified with `algo`.
|
|
///
|
|
/// This hasher is safe to use on untrusted data. If SHA-1 is selected and Git was compiled
|
|
/// with a collision-detecting implementation of SHA-1, then this function will use that
|
|
/// implementation and detect any attempts at a collision.
|
|
pub fn new(algo: HashAlgorithm) -> Self {
|
|
let ctx = unsafe { c::git_hash_alloc() };
|
|
unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) };
|
|
Self { algo, ctx }
|
|
}
|
|
}
|
|
|
|
impl CryptoDigest for CryptoHasher {
|
|
/// Return true if this digest is safe for use with untrusted data, false otherwise.
|
|
fn is_safe(&self) -> bool {
|
|
true
|
|
}
|
|
|
|
/// Update the hasher with the specified data.
|
|
fn update(&mut self, data: &[u8]) {
|
|
unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) };
|
|
}
|
|
|
|
/// Return an object ID, consuming the hasher.
|
|
fn into_oid(self) -> ObjectID {
|
|
let mut oid = ObjectID {
|
|
hash: [0u8; 32],
|
|
algo: self.algo as u32,
|
|
};
|
|
unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) };
|
|
oid
|
|
}
|
|
|
|
/// Return a hash as a `Vec`, consuming the hasher.
|
|
fn into_vec(self) -> Vec<u8> {
|
|
let mut v = vec![0u8; self.algo.raw_len()];
|
|
unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) };
|
|
v
|
|
}
|
|
}
|
|
|
|
impl Clone for CryptoHasher {
|
|
fn clone(&self) -> Self {
|
|
let ctx = unsafe { c::git_hash_alloc() };
|
|
unsafe { c::git_hash_clone(ctx, self.ctx) };
|
|
Self {
|
|
algo: self.algo,
|
|
ctx,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Drop for CryptoHasher {
|
|
fn drop(&mut self) {
|
|
unsafe { c::git_hash_free(self.ctx) };
|
|
}
|
|
}
|
|
|
|
impl Write for CryptoHasher {
|
|
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
|
|
self.update(data);
|
|
Ok(data.len())
|
|
}
|
|
|
|
fn flush(&mut self) -> io::Result<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// A hash algorithm,
|
|
#[repr(C)]
|
|
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
|
|
pub enum HashAlgorithm {
|
|
SHA1 = 1,
|
|
SHA256 = 2,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl HashAlgorithm {
|
|
const SHA1_NULL_OID: ObjectID = ObjectID {
|
|
hash: [0u8; 32],
|
|
algo: Self::SHA1 as u32,
|
|
};
|
|
const SHA256_NULL_OID: ObjectID = ObjectID {
|
|
hash: [0u8; 32],
|
|
algo: Self::SHA256 as u32,
|
|
};
|
|
|
|
const SHA1_EMPTY_TREE: ObjectID = ObjectID {
|
|
hash: *b"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
|
algo: Self::SHA1 as u32,
|
|
};
|
|
const SHA256_EMPTY_TREE: ObjectID = ObjectID {
|
|
hash: *b"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc\x53\x21",
|
|
algo: Self::SHA256 as u32,
|
|
};
|
|
|
|
const SHA1_EMPTY_BLOB: ObjectID = ObjectID {
|
|
hash: *b"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
|
algo: Self::SHA1 as u32,
|
|
};
|
|
const SHA256_EMPTY_BLOB: ObjectID = ObjectID {
|
|
hash: *b"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72\x18\x13",
|
|
algo: Self::SHA256 as u32,
|
|
};
|
|
|
|
/// Return a hash algorithm based on the internal integer ID used by Git.
|
|
///
|
|
/// Returns `None` if the algorithm doesn't indicate a valid algorithm.
|
|
pub const fn from_u32(algo: u32) -> Option<HashAlgorithm> {
|
|
match algo {
|
|
1 => Some(HashAlgorithm::SHA1),
|
|
2 => Some(HashAlgorithm::SHA256),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Return a hash algorithm based on the internal integer ID used by Git.
|
|
///
|
|
/// Returns `None` if the algorithm doesn't indicate a valid algorithm.
|
|
pub const fn from_format_id(algo: u32) -> Option<HashAlgorithm> {
|
|
match algo {
|
|
0x73686131 => Some(HashAlgorithm::SHA1),
|
|
0x73323536 => Some(HashAlgorithm::SHA256),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// The name of this hash algorithm as a string suitable for the configuration file.
|
|
pub const fn name(self) -> &'static str {
|
|
match self {
|
|
HashAlgorithm::SHA1 => "sha1",
|
|
HashAlgorithm::SHA256 => "sha256",
|
|
}
|
|
}
|
|
|
|
/// The format ID of this algorithm for binary formats.
|
|
///
|
|
/// Note that when writing this to a data format, it should be written in big-endian format
|
|
/// explicitly.
|
|
pub const fn format_id(self) -> u32 {
|
|
match self {
|
|
HashAlgorithm::SHA1 => 0x73686131,
|
|
HashAlgorithm::SHA256 => 0x73323536,
|
|
}
|
|
}
|
|
|
|
/// The length of binary object IDs in this algorithm in bytes.
|
|
pub const fn raw_len(self) -> usize {
|
|
match self {
|
|
HashAlgorithm::SHA1 => 20,
|
|
HashAlgorithm::SHA256 => 32,
|
|
}
|
|
}
|
|
|
|
/// The length of object IDs in this algorithm in hexadecimal characters.
|
|
pub const fn hex_len(self) -> usize {
|
|
self.raw_len() * 2
|
|
}
|
|
|
|
/// The number of bytes which is processed by one iteration of this algorithm's compression
|
|
/// function.
|
|
pub const fn block_size(self) -> usize {
|
|
match self {
|
|
HashAlgorithm::SHA1 => 64,
|
|
HashAlgorithm::SHA256 => 64,
|
|
}
|
|
}
|
|
|
|
/// The object ID representing the empty blob.
|
|
pub const fn empty_blob(self) -> &'static ObjectID {
|
|
match self {
|
|
HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_BLOB,
|
|
HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_BLOB,
|
|
}
|
|
}
|
|
|
|
/// The object ID representing the empty tree.
|
|
pub const fn empty_tree(self) -> &'static ObjectID {
|
|
match self {
|
|
HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_TREE,
|
|
HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_TREE,
|
|
}
|
|
}
|
|
|
|
/// The object ID which is all zeros.
|
|
pub const fn null_oid(self) -> &'static ObjectID {
|
|
match self {
|
|
HashAlgorithm::SHA1 => &Self::SHA1_NULL_OID,
|
|
HashAlgorithm::SHA256 => &Self::SHA256_NULL_OID,
|
|
}
|
|
}
|
|
|
|
/// A pointer to the C `struct git_hash_algo` for interoperability with C.
|
|
pub fn hash_algo_ptr(self) -> *const c_void {
|
|
unsafe { c::hash_algo_ptr_by_number(self as u32) }
|
|
}
|
|
|
|
/// Create a hasher for this algorithm.
|
|
pub fn hasher(self) -> CryptoHasher {
|
|
CryptoHasher::new(self)
|
|
}
|
|
}
|
|
|
|
pub mod c {
|
|
use std::os::raw::c_void;
|
|
|
|
extern "C" {
|
|
pub fn hash_algo_ptr_by_number(n: u32) -> *const c_void;
|
|
pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void;
|
|
pub fn git_hash_alloc() -> *mut c_void;
|
|
pub fn git_hash_free(ctx: *mut c_void);
|
|
pub fn git_hash_init(dst: *mut c_void, algop: *const c_void);
|
|
pub fn git_hash_clone(dst: *mut c_void, src: *const c_void);
|
|
pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize);
|
|
pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void);
|
|
pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{CryptoDigest, HashAlgorithm, ObjectID};
|
|
use std::io::Write;
|
|
|
|
fn all_algos() -> &'static [HashAlgorithm] {
|
|
&[HashAlgorithm::SHA1, HashAlgorithm::SHA256]
|
|
}
|
|
|
|
#[test]
|
|
fn format_id_round_trips() {
|
|
for algo in all_algos() {
|
|
assert_eq!(
|
|
*algo,
|
|
HashAlgorithm::from_format_id(algo.format_id()).unwrap()
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn offset_round_trips() {
|
|
for algo in all_algos() {
|
|
assert_eq!(*algo, HashAlgorithm::from_u32(*algo as u32).unwrap());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn slices_have_correct_length() {
|
|
for algo in all_algos() {
|
|
for oid in [algo.null_oid(), algo.empty_blob(), algo.empty_tree()] {
|
|
assert_eq!(oid.as_slice().unwrap().len(), algo.raw_len());
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn object_ids_format_correctly() {
|
|
let entries = &[
|
|
(
|
|
HashAlgorithm::SHA1.null_oid(),
|
|
"0000000000000000000000000000000000000000",
|
|
"0000000000000000000000000000000000000000:sha1",
|
|
),
|
|
(
|
|
HashAlgorithm::SHA1.empty_blob(),
|
|
"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
|
|
"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391:sha1",
|
|
),
|
|
(
|
|
HashAlgorithm::SHA1.empty_tree(),
|
|
"4b825dc642cb6eb9a060e54bf8d69288fbee4904",
|
|
"4b825dc642cb6eb9a060e54bf8d69288fbee4904:sha1",
|
|
),
|
|
(
|
|
HashAlgorithm::SHA256.null_oid(),
|
|
"0000000000000000000000000000000000000000000000000000000000000000",
|
|
"0000000000000000000000000000000000000000000000000000000000000000:sha256",
|
|
),
|
|
(
|
|
HashAlgorithm::SHA256.empty_blob(),
|
|
"473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813",
|
|
"473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813:sha256",
|
|
),
|
|
(
|
|
HashAlgorithm::SHA256.empty_tree(),
|
|
"6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321",
|
|
"6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321:sha256",
|
|
),
|
|
];
|
|
for (oid, display, debug) in entries {
|
|
assert_eq!(format!("{}", oid), *display);
|
|
assert_eq!(format!("{:?}", oid), *debug);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn hasher_works_correctly() {
|
|
for algo in all_algos() {
|
|
let tests: &[(&[u8], &ObjectID)] = &[
|
|
(b"blob 0\0", algo.empty_blob()),
|
|
(b"tree 0\0", algo.empty_tree()),
|
|
];
|
|
for (data, oid) in tests {
|
|
let mut h = algo.hasher();
|
|
assert!(h.is_safe());
|
|
// Test that this works incrementally.
|
|
h.update(&data[0..2]);
|
|
h.update(&data[2..]);
|
|
|
|
let h2 = h.clone();
|
|
|
|
let actual_oid = h.into_oid();
|
|
assert_eq!(**oid, actual_oid);
|
|
|
|
let v = h2.into_vec();
|
|
assert_eq!((*oid).as_slice().unwrap(), &v);
|
|
|
|
let mut h = algo.hasher();
|
|
h.write_all(&data[0..2]).unwrap();
|
|
h.write_all(&data[2..]).unwrap();
|
|
|
|
let actual_oid = h.into_oid();
|
|
assert_eq!(**oid, actual_oid);
|
|
}
|
|
}
|
|
}
|
|
}
|