Browse Source
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
David Barr
14 years ago
committed by
Junio C Hamano
7 changed files with 210 additions and 3 deletions
@ -0,0 +1,31 @@
@@ -0,0 +1,31 @@
|
||||
/* |
||||
* test-string-pool.c: code to exercise the svn importer's string pool |
||||
*/ |
||||
|
||||
#include "git-compat-util.h" |
||||
#include "vcs-svn/string_pool.h" |
||||
|
||||
int main(int argc, char *argv[]) |
||||
{ |
||||
const uint32_t unequal = pool_intern("does not equal"); |
||||
const uint32_t equal = pool_intern("equals"); |
||||
uint32_t buf[3]; |
||||
uint32_t n; |
||||
|
||||
if (argc != 2) |
||||
usage("test-string-pool <string>,<string>"); |
||||
|
||||
n = pool_tok_seq(3, buf, ",-", argv[1]); |
||||
if (n >= 3) |
||||
die("too many strings"); |
||||
if (n <= 1) |
||||
die("too few strings"); |
||||
|
||||
buf[2] = buf[1]; |
||||
buf[1] = (buf[0] == buf[2]) ? equal : unequal; |
||||
pool_print_seq(3, buf, ' ', stdout); |
||||
fputc('\n', stdout); |
||||
|
||||
pool_reset(); |
||||
return 0; |
||||
} |
@ -0,0 +1,102 @@
@@ -0,0 +1,102 @@
|
||||
/* |
||||
* Licensed under a two-clause BSD-style license. |
||||
* See LICENSE for details. |
||||
*/ |
||||
|
||||
#include "git-compat-util.h" |
||||
#include "trp.h" |
||||
#include "obj_pool.h" |
||||
#include "string_pool.h" |
||||
|
||||
static struct trp_root tree = { ~0 }; |
||||
|
||||
struct node { |
||||
uint32_t offset; |
||||
struct trp_node children; |
||||
}; |
||||
|
||||
/* Two memory pools: one for struct node, and another for strings */ |
||||
obj_pool_gen(node, struct node, 4096) |
||||
obj_pool_gen(string, char, 4096) |
||||
|
||||
static char *node_value(struct node *node) |
||||
{ |
||||
return node ? string_pointer(node->offset) : NULL; |
||||
} |
||||
|
||||
static int node_cmp(struct node *a, struct node *b) |
||||
{ |
||||
return strcmp(node_value(a), node_value(b)); |
||||
} |
||||
|
||||
/* Build a Treap from the node structure (a trp_node w/ offset) */ |
||||
trp_gen(static, tree_, struct node, children, node, node_cmp); |
||||
|
||||
const char *pool_fetch(uint32_t entry) |
||||
{ |
||||
return node_value(node_pointer(entry)); |
||||
} |
||||
|
||||
uint32_t pool_intern(const char *key) |
||||
{ |
||||
/* Canonicalize key */ |
||||
struct node *match = NULL, *node; |
||||
uint32_t key_len; |
||||
if (key == NULL) |
||||
return ~0; |
||||
key_len = strlen(key) + 1; |
||||
node = node_pointer(node_alloc(1)); |
||||
node->offset = string_alloc(key_len); |
||||
strcpy(node_value(node), key); |
||||
match = tree_search(&tree, node); |
||||
if (!match) { |
||||
tree_insert(&tree, node); |
||||
} else { |
||||
node_free(1); |
||||
string_free(key_len); |
||||
node = match; |
||||
} |
||||
return node_offset(node); |
||||
} |
||||
|
||||
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) |
||||
{ |
||||
char *token = strtok_r(str, delim, saveptr); |
||||
return token ? pool_intern(token) : ~0; |
||||
} |
||||
|
||||
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) |
||||
{ |
||||
uint32_t i; |
||||
for (i = 0; i < len && ~seq[i]; i++) { |
||||
fputs(pool_fetch(seq[i]), stream); |
||||
if (i < len - 1 && ~seq[i + 1]) |
||||
fputc(delim, stream); |
||||
} |
||||
} |
||||
|
||||
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) |
||||
{ |
||||
char *context = NULL; |
||||
uint32_t token = ~0; |
||||
uint32_t length; |
||||
|
||||
if (sz == 0) |
||||
return ~0; |
||||
if (str) |
||||
token = pool_tok_r(str, delim, &context); |
||||
for (length = 0; length < sz; length++) { |
||||
seq[length] = token; |
||||
if (token == ~0) |
||||
return length; |
||||
token = pool_tok_r(NULL, delim, &context); |
||||
} |
||||
seq[sz - 1] = ~0; |
||||
return sz; |
||||
} |
||||
|
||||
void pool_reset(void) |
||||
{ |
||||
node_reset(); |
||||
string_reset(); |
||||
} |
@ -0,0 +1,11 @@
@@ -0,0 +1,11 @@
|
||||
#ifndef STRING_POOL_H_ |
||||
#define STRING_POOL_H_ |
||||
|
||||
uint32_t pool_intern(const char *key); |
||||
const char *pool_fetch(uint32_t entry); |
||||
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); |
||||
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); |
||||
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); |
||||
void pool_reset(void); |
||||
|
||||
#endif |
@ -0,0 +1,43 @@
@@ -0,0 +1,43 @@
|
||||
string_pool API |
||||
=============== |
||||
|
||||
The string_pool API provides facilities for replacing strings |
||||
with integer keys that can be more easily compared and stored. |
||||
The facilities are designed so that one could teach Git without |
||||
too much trouble to store the information needed for these keys to |
||||
remain valid over multiple executions. |
||||
|
||||
Functions |
||||
--------- |
||||
|
||||
pool_intern:: |
||||
Include a string in the string pool and get its key. |
||||
If that string is already in the pool, retrieves its |
||||
existing key. |
||||
|
||||
pool_fetch:: |
||||
Retrieve the string associated to a given key. |
||||
|
||||
pool_tok_r:: |
||||
Extract the key of the next token from a string. |
||||
Interface mimics strtok_r. |
||||
|
||||
pool_print_seq:: |
||||
Print a sequence of strings named by key to a file, using the |
||||
specified delimiter to separate them. |
||||
|
||||
If NULL (key ~0) appears in the sequence, the sequence ends |
||||
early. |
||||
|
||||
pool_tok_seq:: |
||||
Split a string into tokens, storing the keys of segments |
||||
into a caller-provided array. |
||||
|
||||
Unless sz is 0, the array will always be ~0-terminated. |
||||
If there is not enough room for all the tokens, the |
||||
array holds as many tokens as fit in the entries before |
||||
the terminating ~0. Return value is the index after the |
||||
last token, or sz if the tokens did not fit. |
||||
|
||||
pool_reset:: |
||||
Deallocate storage for the string pool. |
Loading…
Reference in new issue