From c74320872b445104fe5c265e60785d9d26d94cc5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 15:37:18 -0700 Subject: [PATCH 1/3] built-in count-objects. Also it learned to do -v (verbose) to report: - number of loose objects - disk occupied by loose objects - number of objects in local packs - number of loose objects that are also in pack - unrecognised garbage in .git/objects/??/. Signed-off-by: Junio C Hamano --- Makefile | 2 +- builtin-count.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++ builtin.h | 1 + git.c | 1 + 4 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 builtin-count.c diff --git a/Makefile b/Makefile index 8ce27a65fb..14193aa1e2 100644 --- a/Makefile +++ b/Makefile @@ -214,7 +214,7 @@ LIB_OBJS = \ $(DIFF_OBJS) BUILTIN_OBJS = \ - builtin-log.o builtin-help.o + builtin-log.o builtin-help.o builtin-count.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS = $(GITLIBS) -lz diff --git a/builtin-count.c b/builtin-count.c new file mode 100644 index 0000000000..0256369d5b --- /dev/null +++ b/builtin-count.c @@ -0,0 +1,123 @@ +/* + * Builtin "git count-objects". + * + * Copyright (c) 2006 Junio C Hamano + */ + +#include "cache.h" +#include "builtin.h" + +static const char count_objects_usage[] = "git-count-objects [-v]"; + +static void count_objects(DIR *d, char *path, int len, int verbose, + unsigned long *loose, + unsigned long *loose_size, + unsigned long *packed_loose, + unsigned long *garbage) +{ + struct dirent *ent; + while ((ent = readdir(d)) != NULL) { + char hex[41]; + unsigned char sha1[20]; + const char *cp; + int bad = 0; + + if ((ent->d_name[0] == '.') && + (ent->d_name[1] == 0 || + ((ent->d_name[1] == '.') && (ent->d_name[2] == 0)))) + continue; + for (cp = ent->d_name; *cp; cp++) { + int ch = *cp; + if (('0' <= ch && ch <= '9') || + ('a' <= ch && ch <= 'f')) + continue; + bad = 1; + break; + } + if (cp - ent->d_name != 38) + bad = 1; + else { + struct stat st; + memcpy(path + len + 3, ent->d_name, 38); + path[len + 2] = '/'; + path[len + 41] = 0; + if (lstat(path, &st) || !S_ISREG(st.st_mode)) + bad = 1; + else + (*loose_size) += st.st_blocks; + } + if (bad) { + if (verbose) { + error("garbage found: %.*s/%s", + len + 2, path, ent->d_name); + (*garbage)++; + } + continue; + } + (*loose)++; + if (!verbose) + continue; + memcpy(hex, path+len, 2); + memcpy(hex+2, ent->d_name, 38); + hex[40] = 0; + if (get_sha1_hex(hex, sha1)) + die("internal error"); + if (has_sha1_pack(sha1)) + (*packed_loose)++; + } +} + +int cmd_count_objects(int ac, const char **av, char **ep) +{ + int i; + int verbose = 0; + const char *objdir = get_object_directory(); + int len = strlen(objdir); + char *path = xmalloc(len + 50); + unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; + unsigned long loose_size = 0; + + for (i = 1; i < ac; i++) { + const char *arg = av[i]; + if (*arg != '-') + break; + else if (!strcmp(arg, "-v")) + verbose = 1; + else + usage(count_objects_usage); + } + + /* we do not take arguments other than flags for now */ + if (i < ac) + usage(count_objects_usage); + memcpy(path, objdir, len); + if (len && objdir[len-1] != '/') + path[len++] = '/'; + for (i = 0; i < 256; i++) { + DIR *d; + sprintf(path + len, "%02x", i); + d = opendir(path); + if (!d) + continue; + count_objects(d, path, len, verbose, + &loose, &loose_size, &packed_loose, &garbage); + closedir(d); + } + if (verbose) { + struct packed_git *p; + for (p = packed_git; p; p = p->next) { + if (!p->pack_local) + continue; + packed += num_packed_objects(p); + } + printf("count: %lu\n", loose); + printf("size: %lu\n", loose_size / 2); + printf("in-pack: %lu\n", packed); + printf("prune-packable: %lu\n", packed_loose); + printf("garbage: %lu\n", garbage); + } + else + printf("%lu objects, %lu kilobytes\n", + loose, loose_size / 2); + return 0; +} diff --git a/builtin.h b/builtin.h index 47408a0585..76169e3f05 100644 --- a/builtin.h +++ b/builtin.h @@ -19,5 +19,6 @@ extern int cmd_version(int argc, const char **argv, char **envp); extern int cmd_whatchanged(int argc, const char **argv, char **envp); extern int cmd_show(int argc, const char **argv, char **envp); extern int cmd_log(int argc, const char **argv, char **envp); +extern int cmd_count_objects(int argc, const char **argv, char **envp); #endif diff --git a/git.c b/git.c index 01b7e28b8c..00fb399725 100644 --- a/git.c +++ b/git.c @@ -46,6 +46,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "log", cmd_log }, { "whatchanged", cmd_whatchanged }, { "show", cmd_show }, + { "count-objects", cmd_count_objects }, }; int i; From 468eb79ed453017151c1230057986a7a79264395 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 1 May 2006 23:05:39 -0700 Subject: [PATCH 2/3] builtin-count-objects: make it official. Remove the shell-script version, make the hardlink from the git binary, and update the documentation to describe a new option. Signed-off-by: Junio C Hamano --- Documentation/git-count-objects.txt | 12 ++++++++++- Makefile | 5 +++-- git-count-objects.sh | 31 ----------------------------- 3 files changed, 14 insertions(+), 34 deletions(-) delete mode 100755 git-count-objects.sh diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt index 47216f488b..198ce77a8a 100644 --- a/Documentation/git-count-objects.txt +++ b/Documentation/git-count-objects.txt @@ -7,13 +7,23 @@ git-count-objects - Reports on unpacked objects SYNOPSIS -------- -'git-count-objects' +'git-count-objects' [-v] DESCRIPTION ----------- This counts the number of unpacked object files and disk space consumed by them, to help you decide when it is a good time to repack. + +OPTIONS +------- +-v:: + In addition to the number of loose objects and disk + space consumed, it reports the number of in-pack + objects, and number of objects that can be removed by + running `git-prune-packed`. + + Author ------ Written by Junio C Hamano diff --git a/Makefile b/Makefile index 14193aa1e2..f1592fb6ef 100644 --- a/Makefile +++ b/Makefile @@ -115,7 +115,7 @@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ SCRIPT_SH = \ git-add.sh git-bisect.sh git-branch.sh git-checkout.sh \ git-cherry.sh git-clean.sh git-clone.sh git-commit.sh \ - git-count-objects.sh git-diff.sh git-fetch.sh \ + git-diff.sh git-fetch.sh \ git-format-patch.sh git-ls-remote.sh \ git-merge-one-file.sh git-parse-remote.sh \ git-prune.sh git-pull.sh git-push.sh git-rebase.sh \ @@ -167,7 +167,8 @@ PROGRAMS = \ git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \ git-describe$X git-merge-tree$X git-blame$X git-imap-send$X -BUILT_INS = git-log$X +BUILT_INS = git-log$X \ + git-count-objects$X # what 'all' will build and 'install' will install, in gitexecdir ALL_PROGRAMS = $(PROGRAMS) $(SIMPLE_PROGRAMS) $(SCRIPTS) diff --git a/git-count-objects.sh b/git-count-objects.sh deleted file mode 100755 index 40c58efe08..0000000000 --- a/git-count-objects.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2005 Junio C Hamano -# - -GIT_DIR=`git-rev-parse --git-dir` || exit $? - -dc /dev/null || { - # This is not a real DC at all -- it just knows how - # this script feeds DC and does the computation itself. - dc () { - while read a b - do - case $a,$b in - 0,) acc=0 ;; - *,+) acc=$(($acc + $a)) ;; - p,) echo "$acc" ;; - esac - done - } -} - -echo $(find "$GIT_DIR/objects"/?? -type f -print 2>/dev/null | wc -l) objects, \ -$({ - echo 0 - # "no-such" is to help Darwin folks by not using xargs -r. - find "$GIT_DIR/objects"/?? -type f -print 2>/dev/null | - xargs du -k "$GIT_DIR/objects/no-such" 2>/dev/null | - sed -e 's/[ ].*/ +/' - echo p -} | dc) kilobytes From 80fe7d2b54b1aebde6c067694cdbf3ba40ad0c39 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 2 May 2006 23:03:15 -0700 Subject: [PATCH 3/3] builtin-count-objects: open packs when running -v Otherwise we would report absolutely no objects in a fully packed repository. Signed-off-by: Junio C Hamano --- builtin-count.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin-count.c b/builtin-count.c index 0256369d5b..5ee72df247 100644 --- a/builtin-count.c +++ b/builtin-count.c @@ -105,6 +105,8 @@ int cmd_count_objects(int ac, const char **av, char **ep) } if (verbose) { struct packed_git *p; + if (!packed_git) + prepare_packed_git(); for (p = packed_git; p; p = p->next) { if (!p->pack_local) continue;