From 443596850f464c768624e6b6477acadc2aa35e8f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:17:02 -0400 Subject: [PATCH 1/6] fetch-pack: sort incoming heads There's no reason to preserve the incoming order of the heads we're requested to fetch. By having them sorted, we can replace some of the quadratic algorithms with linear ones. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 10db15b184..d9e0ee51b0 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -1057,6 +1057,11 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) return ret; } +static int compare_heads(const void *a, const void *b) +{ + return strcmp(*(const char **)a, *(const char **)b); +} + struct ref *fetch_pack(struct fetch_pack_args *my_args, int fd[], struct child_process *conn, const struct ref *ref, @@ -1076,8 +1081,11 @@ struct ref *fetch_pack(struct fetch_pack_args *my_args, st.st_mtime = 0; } - if (heads && nr_heads) + if (heads && nr_heads) { nr_heads = remove_duplicates(nr_heads, heads); + qsort(heads, nr_heads, sizeof(*heads), compare_heads); + } + if (!ref) { packet_flush(fd[1]); die("no matching remote head"); From 7db8d5370f070ab983e1bc3569380b8aca899c6d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:17:20 -0400 Subject: [PATCH 2/6] fetch-pack: avoid quadratic behavior in remove_duplicates We remove duplicate entries from the list of refs we are fed in fetch-pack. The original algorithm is quadratic over the number of refs, but since the list is now guaranteed to be sorted, we can do it in linear time. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index d9e0ee51b0..e101842627 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -834,21 +834,12 @@ static int remove_duplicates(int nr_heads, char **heads) { int src, dst; - for (src = dst = 0; src < nr_heads; src++) { - /* If heads[src] is different from any of - * heads[0..dst], push it in. - */ - int i; - for (i = 0; i < dst; i++) { - if (!strcmp(heads[i], heads[src])) - break; - } - if (i < dst) - continue; - if (src != dst) - heads[dst] = heads[src]; - dst++; - } + if (!nr_heads) + return 0; + + for (src = dst = 1; src < nr_heads; src++) + if (strcmp(heads[src], heads[dst-1])) + heads[dst++] = heads[src]; return dst; } From ed81c76bc3a9440e37b3512c9c2b742c6ca92c6f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:19:28 -0400 Subject: [PATCH 3/6] add sorting infrastructure for list refs Since we store lists of refs as linked lists, we can use llist_mergesort to efficiently sort them. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 22 ++++++++++++++++++++++ remote.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/remote.c b/remote.c index b296d17404..6833538829 100644 --- a/remote.c +++ b/remote.c @@ -7,6 +7,7 @@ #include "dir.h" #include "tag.h" #include "string-list.h" +#include "mergesort.h" enum map_direction { FROM_SRC, FROM_DST }; @@ -918,6 +919,27 @@ void free_refs(struct ref *ref) } } +int ref_compare_name(const void *va, const void *vb) +{ + const struct ref *a = va, *b = vb; + return strcmp(a->name, b->name); +} + +static void *ref_list_get_next(const void *a) +{ + return ((const struct ref *)a)->next; +} + +static void ref_list_set_next(void *a, void *next) +{ + ((struct ref *)a)->next = next; +} + +void sort_ref_list(struct ref **l, int (*cmp)(const void *, const void *)) +{ + *l = llist_mergesort(*l, ref_list_get_next, ref_list_set_next, cmp); +} + static int count_refspec_match(const char *pattern, struct ref *refs, struct ref **matched_ref) diff --git a/remote.h b/remote.h index 9ad8eb6cc6..251d8fd965 100644 --- a/remote.h +++ b/remote.h @@ -72,6 +72,8 @@ extern const struct refspec *tag_refspec; struct ref *alloc_ref(const char *name); struct ref *copy_ref(const struct ref *ref); struct ref *copy_ref_list(const struct ref *ref); +void sort_ref_list(struct ref **, int (*cmp)(const void *, const void *)); +int ref_compare_name(const void *, const void *); int check_ref_type(const struct ref *ref, int flags); From 9e8e704f0bc14da325aa63f639c6dc782c81e26f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:19:51 -0400 Subject: [PATCH 4/6] fetch-pack: sort the list of incoming refs Having the list sorted means we can avoid some quadratic algorithms when comparing lists. These should typically be sorted already, but they do come from the remote, so let's be extra careful. Our ref-sorting implementation does a mergesort, so we do not have to care about performance degrading in the common case that the list is already sorted. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index e101842627..7d708fb8ad 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -777,6 +777,8 @@ static struct ref *do_fetch_pack(int fd[2], struct ref *ref = copy_ref_list(orig_ref); unsigned char sha1[20]; + sort_ref_list(&ref, ref_compare_name); + if (is_repository_shallow() && !server_supports("shallow")) die("Server does not support shallow clients"); if (server_supports("multi_ack_detailed")) { From a0de28805dceaca86a6f7bedfd3e8c227b781d9d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 21 May 2012 18:23:29 -0400 Subject: [PATCH 5/6] fetch-pack: avoid quadratic loop in filter_refs We have a list of refs that we want to compare against the "match" array. The current code searches the match list linearly, giving quadratic behavior over the number of refs when you want to fetch all of them. Instead, we can compare the lists as we go, giving us linear behavior. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 7d708fb8ad..8a724739ba 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -528,6 +528,7 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) struct ref **newtail = &newlist; struct ref *ref, *next; struct ref *fastarray[32]; + int match_pos; if (nr_match && !args.fetch_all) { if (ARRAY_SIZE(fastarray) < nr_match) @@ -540,6 +541,7 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) else return_refs = NULL; + match_pos = 0; for (ref = *refs; ref; ref = next) { next = ref->next; if (!memcmp(ref->name, "refs/", 5) && @@ -553,15 +555,20 @@ static void filter_refs(struct ref **refs, int nr_match, char **match) continue; } else { - int i; - for (i = 0; i < nr_match; i++) { - if (!strcmp(ref->name, match[i])) { - match[i][0] = '\0'; - return_refs[i] = ref; + int cmp = -1; + while (match_pos < nr_match) { + cmp = strcmp(ref->name, match[match_pos]); + if (cmp < 0) /* definitely do not have it */ + break; + else if (cmp == 0) { /* definitely have it */ + match[match_pos][0] = '\0'; + return_refs[match_pos] = ref; break; } + else /* might have it; keep looking */ + match_pos++; } - if (i < nr_match) + if (!cmp) continue; /* we will link it later */ } free(ref); From 3d2a33e57faa84be3ab83a80c8b75dad3e747054 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 24 May 2012 02:04:51 -0400 Subject: [PATCH 6/6] fetch-pack: sort incoming heads list earlier Commit 4435968 started sorting heads fed to fetch-pack so that later commits could use more optimized algorithms; commit 7db8d53 switched the remove_duplicates function to such an algorithm. Of course, the sorting is more effective if you do it _before_ the algorithm in question. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fetch-pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 8a724739ba..b18ba05bbe 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -1082,8 +1082,8 @@ struct ref *fetch_pack(struct fetch_pack_args *my_args, } if (heads && nr_heads) { - nr_heads = remove_duplicates(nr_heads, heads); qsort(heads, nr_heads, sizeof(*heads), compare_heads); + nr_heads = remove_duplicates(nr_heads, heads); } if (!ref) {