Browse Source
Provide macros to generate a type-specific treap implementation and various functions to operate on it. It uses obj_pool.h to store memory nodes in a treap. Previously committed nodes are never removed from the pool; after any *_commit operation, it is assumed (correctly, in the case of svn-fast-export) that someone else must care about them. Treaps provide a memory-efficient binary search tree structure. Insertion/deletion/search are about as about as fast in the average case as red-black trees and the chances of worst-case behavior are vanishingly small, thanks to (pseudo-)randomness. The bad worst-case behavior is a small price to pay, given that treaps are much simpler to implement. >From http://www.canonware.com/download/trp/trp_hash/trp.h [db: Altered to reference nodes by offset from a common base pointer] [db: Bob Jenkins' hashing implementation dropped for Knuth's] [db: Methods unnecessary for search and insert dropped] [rr: Squelched compiler warnings] [db: Added support for immutable treap nodes] [jn: Reintroduced treap_nsearch(); with tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint


7 changed files with 432 additions and 1 deletions
@ -0,0 +1,65 @@
@@ -0,0 +1,65 @@
|
||||
/* |
||||
* test-treap.c: code to exercise the svn importer's treap structure |
||||
*/ |
||||
|
||||
#include "cache.h" |
||||
#include "vcs-svn/obj_pool.h" |
||||
#include "vcs-svn/trp.h" |
||||
|
||||
struct int_node { |
||||
uintmax_t n; |
||||
struct trp_node children; |
||||
}; |
||||
|
||||
obj_pool_gen(node, struct int_node, 3) |
||||
|
||||
static int node_cmp(struct int_node *a, struct int_node *b) |
||||
{ |
||||
return (a->n > b->n) - (a->n < b->n); |
||||
} |
||||
|
||||
trp_gen(static, treap_, struct int_node, children, node, node_cmp) |
||||
|
||||
static void strtonode(struct int_node *item, const char *s) |
||||
{ |
||||
char *end; |
||||
item->n = strtoumax(s, &end, 10); |
||||
if (*s == '\0' || (*end != '\n' && *end != '\0')) |
||||
die("invalid integer: %s", s); |
||||
} |
||||
|
||||
int main(int argc, char *argv[]) |
||||
{ |
||||
struct strbuf sb = STRBUF_INIT; |
||||
struct trp_root root = { ~0 }; |
||||
uint32_t item; |
||||
|
||||
if (argc != 1) |
||||
usage("test-treap < ints"); |
||||
|
||||
while (strbuf_getline(&sb, stdin, '\n') != EOF) { |
||||
item = node_alloc(1); |
||||
strtonode(node_pointer(item), sb.buf); |
||||
treap_insert(&root, node_pointer(item)); |
||||
} |
||||
|
||||
item = node_offset(treap_first(&root)); |
||||
while (~item) { |
||||
uint32_t next; |
||||
struct int_node *tmp = node_pointer(node_alloc(1)); |
||||
|
||||
tmp->n = node_pointer(item)->n; |
||||
next = node_offset(treap_next(&root, node_pointer(item))); |
||||
|
||||
treap_remove(&root, node_pointer(item)); |
||||
item = node_offset(treap_nsearch(&root, tmp)); |
||||
|
||||
if (item != next && (!~item || node_pointer(item)->n != tmp->n)) |
||||
die("found %"PRIuMAX" in place of %"PRIuMAX"", |
||||
~item ? node_pointer(item)->n : ~(uintmax_t) 0, |
||||
~next ? node_pointer(next)->n : ~(uintmax_t) 0); |
||||
printf("%"PRIuMAX"\n", tmp->n); |
||||
} |
||||
node_reset(); |
||||
return 0; |
||||
} |
@ -0,0 +1,236 @@
@@ -0,0 +1,236 @@
|
||||
/* |
||||
* C macro implementation of treaps. |
||||
* |
||||
* Usage: |
||||
* #include <stdint.h> |
||||
* #include "trp.h" |
||||
* trp_gen(...) |
||||
* |
||||
* Licensed under a two-clause BSD-style license. |
||||
* See LICENSE for details. |
||||
*/ |
||||
|
||||
#ifndef TRP_H_ |
||||
#define TRP_H_ |
||||
|
||||
#define MAYBE_UNUSED __attribute__((__unused__)) |
||||
|
||||
/* Node structure. */ |
||||
struct trp_node { |
||||
uint32_t trpn_left; |
||||
uint32_t trpn_right; |
||||
}; |
||||
|
||||
/* Root structure. */ |
||||
struct trp_root { |
||||
uint32_t trp_root; |
||||
}; |
||||
|
||||
/* Pointer/Offset conversion. */ |
||||
#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) |
||||
#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) |
||||
#define trpn_modify(a_base, a_offset) \ |
||||
do { \ |
||||
if ((a_offset) < a_base##_pool.committed) { \ |
||||
uint32_t old_offset = (a_offset);\ |
||||
(a_offset) = a_base##_alloc(1); \ |
||||
*trpn_pointer(a_base, a_offset) = \ |
||||
*trpn_pointer(a_base, old_offset); \ |
||||
} \ |
||||
} while (0); |
||||
|
||||
/* Left accessors. */ |
||||
#define trp_left_get(a_base, a_field, a_node) \ |
||||
(trpn_pointer(a_base, a_node)->a_field.trpn_left) |
||||
#define trp_left_set(a_base, a_field, a_node, a_left) \ |
||||
do { \ |
||||
trpn_modify(a_base, a_node); \ |
||||
trp_left_get(a_base, a_field, a_node) = (a_left); \ |
||||
} while(0) |
||||
|
||||
/* Right accessors. */ |
||||
#define trp_right_get(a_base, a_field, a_node) \ |
||||
(trpn_pointer(a_base, a_node)->a_field.trpn_right) |
||||
#define trp_right_set(a_base, a_field, a_node, a_right) \ |
||||
do { \ |
||||
trpn_modify(a_base, a_node); \ |
||||
trp_right_get(a_base, a_field, a_node) = (a_right); \ |
||||
} while(0) |
||||
|
||||
/* |
||||
* Fibonacci hash function. |
||||
* The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). |
||||
* See Knuth §6.4: volume 3, 3rd ed, p518. |
||||
*/ |
||||
#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) |
||||
|
||||
/* Priority accessors. */ |
||||
#define trp_prio_get(a_node) trpn_hash(a_node) |
||||
|
||||
/* Node initializer. */ |
||||
#define trp_node_new(a_base, a_field, a_node) \ |
||||
do { \ |
||||
trp_left_set(a_base, a_field, (a_node), ~0); \ |
||||
trp_right_set(a_base, a_field, (a_node), ~0); \ |
||||
} while(0) |
||||
|
||||
/* Internal utility macros. */ |
||||
#define trpn_first(a_base, a_field, a_root, r_node) \ |
||||
do { \ |
||||
(r_node) = (a_root); \ |
||||
if ((r_node) == ~0) \ |
||||
return NULL; \ |
||||
while (~trp_left_get(a_base, a_field, (r_node))) \ |
||||
(r_node) = trp_left_get(a_base, a_field, (r_node)); \ |
||||
} while (0) |
||||
|
||||
#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ |
||||
do { \ |
||||
(r_node) = trp_right_get(a_base, a_field, (a_node)); \ |
||||
trp_right_set(a_base, a_field, (a_node), \ |
||||
trp_left_get(a_base, a_field, (r_node))); \ |
||||
trp_left_set(a_base, a_field, (r_node), (a_node)); \ |
||||
} while(0) |
||||
|
||||
#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ |
||||
do { \ |
||||
(r_node) = trp_left_get(a_base, a_field, (a_node)); \ |
||||
trp_left_set(a_base, a_field, (a_node), \ |
||||
trp_right_get(a_base, a_field, (r_node))); \ |
||||
trp_right_set(a_base, a_field, (r_node), (a_node)); \ |
||||
} while(0) |
||||
|
||||
#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ |
||||
a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ |
||||
{ \ |
||||
uint32_t ret; \ |
||||
trpn_first(a_base, a_field, treap->trp_root, ret); \ |
||||
return trpn_pointer(a_base, ret); \ |
||||
} \ |
||||
a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ |
||||
{ \ |
||||
uint32_t ret; \ |
||||
uint32_t offset = trpn_offset(a_base, node); \ |
||||
if (~trp_right_get(a_base, a_field, offset)) { \ |
||||
trpn_first(a_base, a_field, \ |
||||
trp_right_get(a_base, a_field, offset), ret); \ |
||||
} else { \ |
||||
uint32_t tnode = treap->trp_root; \ |
||||
ret = ~0; \ |
||||
while (1) { \ |
||||
int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ |
||||
trpn_pointer(a_base, tnode)); \ |
||||
if (cmp < 0) { \ |
||||
ret = tnode; \ |
||||
tnode = trp_left_get(a_base, a_field, tnode); \ |
||||
} else if (cmp > 0) { \ |
||||
tnode = trp_right_get(a_base, a_field, tnode); \ |
||||
} else { \ |
||||
break; \ |
||||
} \ |
||||
} \ |
||||
} \ |
||||
return trpn_pointer(a_base, ret); \ |
||||
} \ |
||||
a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ |
||||
{ \ |
||||
int cmp; \ |
||||
uint32_t ret = treap->trp_root; \ |
||||
while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ |
||||
if (cmp < 0) { \ |
||||
ret = trp_left_get(a_base, a_field, ret); \ |
||||
} else { \ |
||||
ret = trp_right_get(a_base, a_field, ret); \ |
||||
} \ |
||||
} \ |
||||
return trpn_pointer(a_base, ret); \ |
||||
} \ |
||||
a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ |
||||
{ \ |
||||
int cmp; \ |
||||
uint32_t ret = treap->trp_root; \ |
||||
while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ |
||||
if (cmp < 0) { \ |
||||
if (!~trp_left_get(a_base, a_field, ret)) \ |
||||
break; \ |
||||
ret = trp_left_get(a_base, a_field, ret); \ |
||||
} else { \ |
||||
ret = trp_right_get(a_base, a_field, ret); \ |
||||
} \ |
||||
} \ |
||||
return trpn_pointer(a_base, ret); \ |
||||
} \ |
||||
a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ |
||||
{ \ |
||||
if (cur_node == ~0) { \ |
||||
return (ins_node); \ |
||||
} else { \ |
||||
uint32_t ret; \ |
||||
int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ |
||||
trpn_pointer(a_base, cur_node)); \ |
||||
if (cmp < 0) { \ |
||||
uint32_t left = a_pre##insert_recurse( \ |
||||
trp_left_get(a_base, a_field, cur_node), ins_node); \ |
||||
trp_left_set(a_base, a_field, cur_node, left); \ |
||||
if (trp_prio_get(left) < trp_prio_get(cur_node)) \ |
||||
trpn_rotate_right(a_base, a_field, cur_node, ret); \ |
||||
else \ |
||||
ret = cur_node; \ |
||||
} else { \ |
||||
uint32_t right = a_pre##insert_recurse( \ |
||||
trp_right_get(a_base, a_field, cur_node), ins_node); \ |
||||
trp_right_set(a_base, a_field, cur_node, right); \ |
||||
if (trp_prio_get(right) < trp_prio_get(cur_node)) \ |
||||
trpn_rotate_left(a_base, a_field, cur_node, ret); \ |
||||
else \ |
||||
ret = cur_node; \ |
||||
} \ |
||||
return (ret); \ |
||||
} \ |
||||
} \ |
||||
a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ |
||||
{ \ |
||||
uint32_t offset = trpn_offset(a_base, node); \ |
||||
trp_node_new(a_base, a_field, offset); \ |
||||
treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ |
||||
} \ |
||||
a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ |
||||
{ \ |
||||
int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ |
||||
trpn_pointer(a_base, cur_node)); \ |
||||
if (cmp == 0) { \ |
||||
uint32_t ret; \ |
||||
uint32_t left = trp_left_get(a_base, a_field, cur_node); \ |
||||
uint32_t right = trp_right_get(a_base, a_field, cur_node); \ |
||||
if (left == ~0) { \ |
||||
if (right == ~0) \ |
||||
return (~0); \ |
||||
} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ |
||||
trpn_rotate_right(a_base, a_field, cur_node, ret); \ |
||||
right = a_pre##remove_recurse(cur_node, rem_node); \ |
||||
trp_right_set(a_base, a_field, ret, right); \ |
||||
return (ret); \ |
||||
} \ |
||||
trpn_rotate_left(a_base, a_field, cur_node, ret); \ |
||||
left = a_pre##remove_recurse(cur_node, rem_node); \ |
||||
trp_left_set(a_base, a_field, ret, left); \ |
||||
return (ret); \ |
||||
} else if (cmp < 0) { \ |
||||
uint32_t left = a_pre##remove_recurse( \ |
||||
trp_left_get(a_base, a_field, cur_node), rem_node); \ |
||||
trp_left_set(a_base, a_field, cur_node, left); \ |
||||
return (cur_node); \ |
||||
} else { \ |
||||
uint32_t right = a_pre##remove_recurse( \ |
||||
trp_right_get(a_base, a_field, cur_node), rem_node); \ |
||||
trp_right_set(a_base, a_field, cur_node, right); \ |
||||
return (cur_node); \ |
||||
} \ |
||||
} \ |
||||
a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ |
||||
{ \ |
||||
treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ |
||||
trpn_offset(a_base, node)); \ |
||||
} \ |
||||
|
||||
#endif |
@ -0,0 +1,103 @@
@@ -0,0 +1,103 @@
|
||||
Motivation |
||||
========== |
||||
|
||||
Treaps provide a memory-efficient binary search tree structure. |
||||
Insertion/deletion/search are about as about as fast in the average |
||||
case as red-black trees and the chances of worst-case behavior are |
||||
vanishingly small, thanks to (pseudo-)randomness. The bad worst-case |
||||
behavior is a small price to pay, given that treaps are much simpler |
||||
to implement. |
||||
|
||||
API |
||||
=== |
||||
|
||||
The trp API generates a data structure and functions to handle a |
||||
large growing set of objects stored in a pool. |
||||
|
||||
The caller: |
||||
|
||||
. Specifies parameters for the generated functions with the |
||||
trp_gen(static, foo_, ...) macro. |
||||
|
||||
. Allocates a `struct trp_root` variable and sets it to {~0}. |
||||
|
||||
. Adds new nodes to the set using `foo_insert`. |
||||
|
||||
. Can find a specific item in the set using `foo_search`. |
||||
|
||||
. Can iterate over items in the set using `foo_first` and `foo_next`. |
||||
|
||||
. Can remove an item from the set using `foo_remove`. |
||||
|
||||
Example: |
||||
|
||||
---- |
||||
struct ex_node { |
||||
const char *s; |
||||
struct trp_node ex_link; |
||||
}; |
||||
static struct trp_root ex_base = {~0}; |
||||
obj_pool_gen(ex, struct ex_node, 4096); |
||||
trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) |
||||
struct ex_node *item; |
||||
|
||||
item = ex_pointer(ex_alloc(1)); |
||||
item->s = "hello"; |
||||
ex_insert(&ex_base, item); |
||||
item = ex_pointer(ex_alloc(1)); |
||||
item->s = "goodbye"; |
||||
ex_insert(&ex_base, item); |
||||
for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) |
||||
printf("%s\n", item->s); |
||||
---- |
||||
|
||||
Functions |
||||
--------- |
||||
|
||||
trp_gen(attr, foo_, node_type, link_field, pool, cmp):: |
||||
|
||||
Generate a type-specific treap implementation. |
||||
+ |
||||
. The storage class for generated functions will be 'attr' (e.g., `static`). |
||||
. Generated function names are prefixed with 'foo_' (e.g., `treap_`). |
||||
. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). |
||||
This type must be a struct with at least one `struct trp_node` field |
||||
to point to its children. |
||||
. The field used to access child nodes will be 'link_field'. |
||||
. All treap nodes must lie in the 'pool' object pool. |
||||
. Treap nodes must be totally ordered by the 'cmp' relation, with the |
||||
following prototype: |
||||
+ |
||||
int (*cmp)(node_type \*a, node_type \*b) |
||||
+ |
||||
and returning a value less than, equal to, or greater than zero |
||||
according to the result of comparison. |
||||
|
||||
void foo_insert(struct trp_root *treap, node_type \*node):: |
||||
|
||||
Insert node into treap. If inserted multiple times, |
||||
a node will appear in the treap multiple times. |
||||
|
||||
void foo_remove(struct trp_root *treap, node_type \*node):: |
||||
|
||||
Remove node from treap. Caller must ensure node is |
||||
present in treap before using this function. |
||||
|
||||
node_type *foo_search(struct trp_root \*treap, node_type \*key):: |
||||
|
||||
Search for a node that matches key. If no match is found, |
||||
result is NULL. |
||||
|
||||
node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: |
||||
|
||||
Like `foo_search`, but if if the key is missing return what |
||||
would be key's successor, were key in treap (NULL if no |
||||
successor). |
||||
|
||||
node_type *foo_first(struct trp_root \*treap):: |
||||
|
||||
Find the first item from the treap, in sorted order. |
||||
|
||||
node_type *foo_next(struct trp_root \*treap, node_type \*node):: |
||||
|
||||
Find the next item. |
Loading…
Reference in new issue