Add treap implementation

Provide macros to generate a type-specific treap implementation and
various functions to operate on it. It uses obj_pool.h to store memory
nodes in a treap.  Previously committed nodes are never removed from
the pool; after any *_commit operation, it is assumed (correctly, in
the case of svn-fast-export) that someone else must care about them.

Treaps provide a memory-efficient binary search tree structure.
Insertion/deletion/search are about as about as fast in the average
case as red-black trees and the chances of worst-case behavior are
vanishingly small, thanks to (pseudo-)randomness.  The bad worst-case
behavior is a small price to pay, given that treaps are much simpler
to implement.

>From http://www.canonware.com/download/trp/trp_hash/trp.h

[db: Altered to reference nodes by offset from a common base pointer]
[db: Bob Jenkins' hashing implementation dropped for Knuth's]
[db: Methods unnecessary for search and insert dropped]
[rr: Squelched compiler warnings]
[db: Added support for immutable treap nodes]
[jn: Reintroduced treap_nsearch(); with tests]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
Jason Evans 2010-08-09 17:17:34 -05:00 committed by Junio C Hamano
parent 4709455db3
commit 951f316470
7 changed files with 432 additions and 1 deletions

1
.gitignore vendored
View File

@ -173,6 +173,7 @@
/test-run-command /test-run-command
/test-sha1 /test-sha1
/test-sigchain /test-sigchain
/test-treap
/common-cmds.h /common-cmds.h
*.tar.gz *.tar.gz
*.dsc *.dsc

View File

@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-run-command
TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sha1
TEST_PROGRAMS_NEED_X += test-sigchain TEST_PROGRAMS_NEED_X += test-sigchain
TEST_PROGRAMS_NEED_X += test-treap
TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS_NEED_X += test-index-version


TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
@ -1866,7 +1867,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h


$(VCSSVN_OBJS): \ $(VCSSVN_OBJS): \
vcs-svn/obj_pool.h vcs-svn/obj_pool.h vcs-svn/trp.h
endif endif


exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \

View File

@ -76,4 +76,26 @@ test_expect_success 'obj pool: high-water mark' '
test_cmp expected actual test_cmp expected actual
' '


test_expect_success 'treap sort' '
cat <<-\EOF >unsorted &&
68
12
13
13
68
13
13
21
10
11
12
13
13
EOF
sort unsorted >expected &&

test-treap <unsorted >actual &&
test_cmp expected actual
'

test_done test_done

65
test-treap.c Normal file
View File

@ -0,0 +1,65 @@
/*
* test-treap.c: code to exercise the svn importer's treap structure
*/

#include "cache.h"
#include "vcs-svn/obj_pool.h"
#include "vcs-svn/trp.h"

struct int_node {
uintmax_t n;
struct trp_node children;
};

obj_pool_gen(node, struct int_node, 3)

static int node_cmp(struct int_node *a, struct int_node *b)
{
return (a->n > b->n) - (a->n < b->n);
}

trp_gen(static, treap_, struct int_node, children, node, node_cmp)

static void strtonode(struct int_node *item, const char *s)
{
char *end;
item->n = strtoumax(s, &end, 10);
if (*s == '\0' || (*end != '\n' && *end != '\0'))
die("invalid integer: %s", s);
}

int main(int argc, char *argv[])
{
struct strbuf sb = STRBUF_INIT;
struct trp_root root = { ~0 };
uint32_t item;

if (argc != 1)
usage("test-treap < ints");

while (strbuf_getline(&sb, stdin, '\n') != EOF) {
item = node_alloc(1);
strtonode(node_pointer(item), sb.buf);
treap_insert(&root, node_pointer(item));
}

item = node_offset(treap_first(&root));
while (~item) {
uint32_t next;
struct int_node *tmp = node_pointer(node_alloc(1));

tmp->n = node_pointer(item)->n;
next = node_offset(treap_next(&root, node_pointer(item)));

treap_remove(&root, node_pointer(item));
item = node_offset(treap_nsearch(&root, tmp));

if (item != next && (!~item || node_pointer(item)->n != tmp->n))
die("found %"PRIuMAX" in place of %"PRIuMAX"",
~item ? node_pointer(item)->n : ~(uintmax_t) 0,
~next ? node_pointer(next)->n : ~(uintmax_t) 0);
printf("%"PRIuMAX"\n", tmp->n);
}
node_reset();
return 0;
}

View File

@ -1,6 +1,9 @@
Copyright (C) 2010 David Barr <david.barr@cordelta.com>. Copyright (C) 2010 David Barr <david.barr@cordelta.com>.
All rights reserved. All rights reserved.


Copyright (C) 2008 Jason Evans <jasone@canonware.com>.
All rights reserved.

Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions modification, are permitted provided that the following conditions
are met: are met:

236
vcs-svn/trp.h Normal file
View File

@ -0,0 +1,236 @@
/*
* C macro implementation of treaps.
*
* Usage:
* #include <stdint.h>
* #include "trp.h"
* trp_gen(...)
*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/

#ifndef TRP_H_
#define TRP_H_

#define MAYBE_UNUSED __attribute__((__unused__))

/* Node structure. */
struct trp_node {
uint32_t trpn_left;
uint32_t trpn_right;
};

/* Root structure. */
struct trp_root {
uint32_t trp_root;
};

/* Pointer/Offset conversion. */
#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
#define trpn_modify(a_base, a_offset) \
do { \
if ((a_offset) < a_base##_pool.committed) { \
uint32_t old_offset = (a_offset);\
(a_offset) = a_base##_alloc(1); \
*trpn_pointer(a_base, a_offset) = \
*trpn_pointer(a_base, old_offset); \
} \
} while (0);

/* Left accessors. */
#define trp_left_get(a_base, a_field, a_node) \
(trpn_pointer(a_base, a_node)->a_field.trpn_left)
#define trp_left_set(a_base, a_field, a_node, a_left) \
do { \
trpn_modify(a_base, a_node); \
trp_left_get(a_base, a_field, a_node) = (a_left); \
} while(0)

/* Right accessors. */
#define trp_right_get(a_base, a_field, a_node) \
(trpn_pointer(a_base, a_node)->a_field.trpn_right)
#define trp_right_set(a_base, a_field, a_node, a_right) \
do { \
trpn_modify(a_base, a_node); \
trp_right_get(a_base, a_field, a_node) = (a_right); \
} while(0)

/*
* Fibonacci hash function.
* The multiplier is the nearest prime to (2^32 times (√5 - 1)/2).
* See Knuth §6.4: volume 3, 3rd ed, p518.
*/
#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node))

/* Priority accessors. */
#define trp_prio_get(a_node) trpn_hash(a_node)

/* Node initializer. */
#define trp_node_new(a_base, a_field, a_node) \
do { \
trp_left_set(a_base, a_field, (a_node), ~0); \
trp_right_set(a_base, a_field, (a_node), ~0); \
} while(0)

/* Internal utility macros. */
#define trpn_first(a_base, a_field, a_root, r_node) \
do { \
(r_node) = (a_root); \
if ((r_node) == ~0) \
return NULL; \
while (~trp_left_get(a_base, a_field, (r_node))) \
(r_node) = trp_left_get(a_base, a_field, (r_node)); \
} while (0)

#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
do { \
(r_node) = trp_right_get(a_base, a_field, (a_node)); \
trp_right_set(a_base, a_field, (a_node), \
trp_left_get(a_base, a_field, (r_node))); \
trp_left_set(a_base, a_field, (r_node), (a_node)); \
} while(0)

#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
do { \
(r_node) = trp_left_get(a_base, a_field, (a_node)); \
trp_left_set(a_base, a_field, (a_node), \
trp_right_get(a_base, a_field, (r_node))); \
trp_right_set(a_base, a_field, (r_node), (a_node)); \
} while(0)

#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \
{ \
uint32_t ret; \
trpn_first(a_base, a_field, treap->trp_root, ret); \
return trpn_pointer(a_base, ret); \
} \
a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \
{ \
uint32_t ret; \
uint32_t offset = trpn_offset(a_base, node); \
if (~trp_right_get(a_base, a_field, offset)) { \
trpn_first(a_base, a_field, \
trp_right_get(a_base, a_field, offset), ret); \
} else { \
uint32_t tnode = treap->trp_root; \
ret = ~0; \
while (1) { \
int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
trpn_pointer(a_base, tnode)); \
if (cmp < 0) { \
ret = tnode; \
tnode = trp_left_get(a_base, a_field, tnode); \
} else if (cmp > 0) { \
tnode = trp_right_get(a_base, a_field, tnode); \
} else { \
break; \
} \
} \
} \
return trpn_pointer(a_base, ret); \
} \
a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \
{ \
int cmp; \
uint32_t ret = treap->trp_root; \
while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \
if (cmp < 0) { \
ret = trp_left_get(a_base, a_field, ret); \
} else { \
ret = trp_right_get(a_base, a_field, ret); \
} \
} \
return trpn_pointer(a_base, ret); \
} \
a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \
{ \
int cmp; \
uint32_t ret = treap->trp_root; \
while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \
if (cmp < 0) { \
if (!~trp_left_get(a_base, a_field, ret)) \
break; \
ret = trp_left_get(a_base, a_field, ret); \
} else { \
ret = trp_right_get(a_base, a_field, ret); \
} \
} \
return trpn_pointer(a_base, ret); \
} \
a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
{ \
if (cur_node == ~0) { \
return (ins_node); \
} else { \
uint32_t ret; \
int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
trpn_pointer(a_base, cur_node)); \
if (cmp < 0) { \
uint32_t left = a_pre##insert_recurse( \
trp_left_get(a_base, a_field, cur_node), ins_node); \
trp_left_set(a_base, a_field, cur_node, left); \
if (trp_prio_get(left) < trp_prio_get(cur_node)) \
trpn_rotate_right(a_base, a_field, cur_node, ret); \
else \
ret = cur_node; \
} else { \
uint32_t right = a_pre##insert_recurse( \
trp_right_get(a_base, a_field, cur_node), ins_node); \
trp_right_set(a_base, a_field, cur_node, right); \
if (trp_prio_get(right) < trp_prio_get(cur_node)) \
trpn_rotate_left(a_base, a_field, cur_node, ret); \
else \
ret = cur_node; \
} \
return (ret); \
} \
} \
a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \
{ \
uint32_t offset = trpn_offset(a_base, node); \
trp_node_new(a_base, a_field, offset); \
treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \
} \
a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
{ \
int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
trpn_pointer(a_base, cur_node)); \
if (cmp == 0) { \
uint32_t ret; \
uint32_t left = trp_left_get(a_base, a_field, cur_node); \
uint32_t right = trp_right_get(a_base, a_field, cur_node); \
if (left == ~0) { \
if (right == ~0) \
return (~0); \
} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
trpn_rotate_right(a_base, a_field, cur_node, ret); \
right = a_pre##remove_recurse(cur_node, rem_node); \
trp_right_set(a_base, a_field, ret, right); \
return (ret); \
} \
trpn_rotate_left(a_base, a_field, cur_node, ret); \
left = a_pre##remove_recurse(cur_node, rem_node); \
trp_left_set(a_base, a_field, ret, left); \
return (ret); \
} else if (cmp < 0) { \
uint32_t left = a_pre##remove_recurse( \
trp_left_get(a_base, a_field, cur_node), rem_node); \
trp_left_set(a_base, a_field, cur_node, left); \
return (cur_node); \
} else { \
uint32_t right = a_pre##remove_recurse( \
trp_right_get(a_base, a_field, cur_node), rem_node); \
trp_right_set(a_base, a_field, cur_node, right); \
return (cur_node); \
} \
} \
a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \
{ \
treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
trpn_offset(a_base, node)); \
} \

#endif

103
vcs-svn/trp.txt Normal file
View File

@ -0,0 +1,103 @@
Motivation
==========

Treaps provide a memory-efficient binary search tree structure.
Insertion/deletion/search are about as about as fast in the average
case as red-black trees and the chances of worst-case behavior are
vanishingly small, thanks to (pseudo-)randomness. The bad worst-case
behavior is a small price to pay, given that treaps are much simpler
to implement.

API
===

The trp API generates a data structure and functions to handle a
large growing set of objects stored in a pool.

The caller:

. Specifies parameters for the generated functions with the
trp_gen(static, foo_, ...) macro.

. Allocates a `struct trp_root` variable and sets it to {~0}.

. Adds new nodes to the set using `foo_insert`.

. Can find a specific item in the set using `foo_search`.

. Can iterate over items in the set using `foo_first` and `foo_next`.

. Can remove an item from the set using `foo_remove`.

Example:

----
struct ex_node {
const char *s;
struct trp_node ex_link;
};
static struct trp_root ex_base = {~0};
obj_pool_gen(ex, struct ex_node, 4096);
trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp)
struct ex_node *item;

item = ex_pointer(ex_alloc(1));
item->s = "hello";
ex_insert(&ex_base, item);
item = ex_pointer(ex_alloc(1));
item->s = "goodbye";
ex_insert(&ex_base, item);
for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item))
printf("%s\n", item->s);
----

Functions
---------

trp_gen(attr, foo_, node_type, link_field, pool, cmp)::

Generate a type-specific treap implementation.
+
. The storage class for generated functions will be 'attr' (e.g., `static`).
. Generated function names are prefixed with 'foo_' (e.g., `treap_`).
. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`).
This type must be a struct with at least one `struct trp_node` field
to point to its children.
. The field used to access child nodes will be 'link_field'.
. All treap nodes must lie in the 'pool' object pool.
. Treap nodes must be totally ordered by the 'cmp' relation, with the
following prototype:
+
int (*cmp)(node_type \*a, node_type \*b)
+
and returning a value less than, equal to, or greater than zero
according to the result of comparison.

void foo_insert(struct trp_root *treap, node_type \*node)::

Insert node into treap. If inserted multiple times,
a node will appear in the treap multiple times.

void foo_remove(struct trp_root *treap, node_type \*node)::

Remove node from treap. Caller must ensure node is
present in treap before using this function.

node_type *foo_search(struct trp_root \*treap, node_type \*key)::

Search for a node that matches key. If no match is found,
result is NULL.

node_type *foo_nsearch(struct trp_root \*treap, node_type \*key)::

Like `foo_search`, but if if the key is missing return what
would be key's successor, were key in treap (NULL if no
successor).

node_type *foo_first(struct trp_root \*treap)::

Find the first item from the treap, in sorted order.

node_type *foo_next(struct trp_root \*treap, node_type \*node)::

Find the next item.