You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
81 lines
2.6 KiB
81 lines
2.6 KiB
#!/bin/bash |
|
|
|
# Example script to deltify an entire GIT repository based on the commit list. |
|
# The most recent version of a file is the reference and previous versions |
|
# are made delta against the best earlier version available. And so on for |
|
# successive versions going back in time. This way the increasing delta |
|
# overhead is pushed towards older versions of any given file. |
|
# |
|
# The -d argument allows to provide a limit on the delta chain depth. |
|
# If 0 is passed then everything is undeltafied. Limiting the delta |
|
# depth is meaningful for subsequent access performance to old revisions. |
|
# A value of 16 might be a good compromize between performance and good |
|
# space saving. Current default is unbounded. |
|
# |
|
# The --max-behind=30 argument is passed to git-mkdelta so to keep |
|
# combinations and memory usage bounded a bit. If you have lots of memory |
|
# and CPU power you may remove it (or set to 0) to let git-mkdelta find the |
|
# best delta match regardless of the number of revisions for a given file. |
|
# You can also make the value smaller to make it faster and less |
|
# memory hungry. A value of 5 ought to still give pretty good results. |
|
# When set to 0 or ommitted then look behind is unbounded. Note that |
|
# git-mkdelta might die with a segmentation fault in that case if it |
|
# runs out of memory. Note that the GIT repository will still be consistent |
|
# even if git-mkdelta dies unexpectedly. |
|
|
|
set -e |
|
|
|
max_depth= |
|
[ "$1" == "-d" ] && max_depth="--max-depth=$2" && shift 2 |
|
|
|
overlap=30 |
|
max_behind="--max-behind=$overlap" |
|
|
|
function process_list() { |
|
if [ "$list" ]; then |
|
echo "Processing $curr_file" |
|
echo "$list" | xargs git-mkdelta $max_depth $max_behind -v |
|
fi |
|
} |
|
|
|
rev_list="" |
|
curr_file="" |
|
|
|
git-rev-list HEAD | |
|
while true; do |
|
# Let's batch revisions into groups of 1000 to give it a chance to |
|
# scale with repositories containing long revision lists. We also |
|
# overlap with the previous batch the size of mkdelta's look behind |
|
# value in order to account for the processing discontinuity. |
|
rev_list="$(echo -e -n "$rev_list" | tail --lines=$overlap)" |
|
for i in $(seq 1000); do |
|
read rev || break |
|
rev_list="$rev_list$rev\n" |
|
done |
|
echo -e -n "$rev_list" | |
|
git-diff-tree -r -t --stdin | |
|
awk '/^:/ { if ($5 == "M") printf "%s %s\n%s %s\n", $4, $6, $3, $6 }' | |
|
LC_ALL=C sort -s -k 2 | uniq | |
|
while read sha1 file; do |
|
if [ "$file" == "$curr_file" ]; then |
|
list="$list $sha1" |
|
else |
|
process_list |
|
curr_file="$file" |
|
list="$sha1" |
|
fi |
|
done |
|
[ "$rev" ] || break |
|
done |
|
process_list |
|
|
|
curr_file="root directory" |
|
list="$( |
|
git-rev-list HEAD | |
|
while read commit; do |
|
git-cat-file commit $commit | |
|
sed -n 's/tree //p;Q' |
|
done |
|
)" |
|
process_list |
|
|
|
|