Browse Source
Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>maint
David Barr
15 years ago
committed by
Junio C Hamano
7 changed files with 210 additions and 3 deletions
@ -0,0 +1,31 @@ |
|||||||
|
/* |
||||||
|
* test-string-pool.c: code to exercise the svn importer's string pool |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "git-compat-util.h" |
||||||
|
#include "vcs-svn/string_pool.h" |
||||||
|
|
||||||
|
int main(int argc, char *argv[]) |
||||||
|
{ |
||||||
|
const uint32_t unequal = pool_intern("does not equal"); |
||||||
|
const uint32_t equal = pool_intern("equals"); |
||||||
|
uint32_t buf[3]; |
||||||
|
uint32_t n; |
||||||
|
|
||||||
|
if (argc != 2) |
||||||
|
usage("test-string-pool <string>,<string>"); |
||||||
|
|
||||||
|
n = pool_tok_seq(3, buf, ",-", argv[1]); |
||||||
|
if (n >= 3) |
||||||
|
die("too many strings"); |
||||||
|
if (n <= 1) |
||||||
|
die("too few strings"); |
||||||
|
|
||||||
|
buf[2] = buf[1]; |
||||||
|
buf[1] = (buf[0] == buf[2]) ? equal : unequal; |
||||||
|
pool_print_seq(3, buf, ' ', stdout); |
||||||
|
fputc('\n', stdout); |
||||||
|
|
||||||
|
pool_reset(); |
||||||
|
return 0; |
||||||
|
} |
@ -0,0 +1,102 @@ |
|||||||
|
/* |
||||||
|
* Licensed under a two-clause BSD-style license. |
||||||
|
* See LICENSE for details. |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "git-compat-util.h" |
||||||
|
#include "trp.h" |
||||||
|
#include "obj_pool.h" |
||||||
|
#include "string_pool.h" |
||||||
|
|
||||||
|
static struct trp_root tree = { ~0 }; |
||||||
|
|
||||||
|
struct node { |
||||||
|
uint32_t offset; |
||||||
|
struct trp_node children; |
||||||
|
}; |
||||||
|
|
||||||
|
/* Two memory pools: one for struct node, and another for strings */ |
||||||
|
obj_pool_gen(node, struct node, 4096) |
||||||
|
obj_pool_gen(string, char, 4096) |
||||||
|
|
||||||
|
static char *node_value(struct node *node) |
||||||
|
{ |
||||||
|
return node ? string_pointer(node->offset) : NULL; |
||||||
|
} |
||||||
|
|
||||||
|
static int node_cmp(struct node *a, struct node *b) |
||||||
|
{ |
||||||
|
return strcmp(node_value(a), node_value(b)); |
||||||
|
} |
||||||
|
|
||||||
|
/* Build a Treap from the node structure (a trp_node w/ offset) */ |
||||||
|
trp_gen(static, tree_, struct node, children, node, node_cmp); |
||||||
|
|
||||||
|
const char *pool_fetch(uint32_t entry) |
||||||
|
{ |
||||||
|
return node_value(node_pointer(entry)); |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t pool_intern(const char *key) |
||||||
|
{ |
||||||
|
/* Canonicalize key */ |
||||||
|
struct node *match = NULL, *node; |
||||||
|
uint32_t key_len; |
||||||
|
if (key == NULL) |
||||||
|
return ~0; |
||||||
|
key_len = strlen(key) + 1; |
||||||
|
node = node_pointer(node_alloc(1)); |
||||||
|
node->offset = string_alloc(key_len); |
||||||
|
strcpy(node_value(node), key); |
||||||
|
match = tree_search(&tree, node); |
||||||
|
if (!match) { |
||||||
|
tree_insert(&tree, node); |
||||||
|
} else { |
||||||
|
node_free(1); |
||||||
|
string_free(key_len); |
||||||
|
node = match; |
||||||
|
} |
||||||
|
return node_offset(node); |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) |
||||||
|
{ |
||||||
|
char *token = strtok_r(str, delim, saveptr); |
||||||
|
return token ? pool_intern(token) : ~0; |
||||||
|
} |
||||||
|
|
||||||
|
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) |
||||||
|
{ |
||||||
|
uint32_t i; |
||||||
|
for (i = 0; i < len && ~seq[i]; i++) { |
||||||
|
fputs(pool_fetch(seq[i]), stream); |
||||||
|
if (i < len - 1 && ~seq[i + 1]) |
||||||
|
fputc(delim, stream); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) |
||||||
|
{ |
||||||
|
char *context = NULL; |
||||||
|
uint32_t token = ~0; |
||||||
|
uint32_t length; |
||||||
|
|
||||||
|
if (sz == 0) |
||||||
|
return ~0; |
||||||
|
if (str) |
||||||
|
token = pool_tok_r(str, delim, &context); |
||||||
|
for (length = 0; length < sz; length++) { |
||||||
|
seq[length] = token; |
||||||
|
if (token == ~0) |
||||||
|
return length; |
||||||
|
token = pool_tok_r(NULL, delim, &context); |
||||||
|
} |
||||||
|
seq[sz - 1] = ~0; |
||||||
|
return sz; |
||||||
|
} |
||||||
|
|
||||||
|
void pool_reset(void) |
||||||
|
{ |
||||||
|
node_reset(); |
||||||
|
string_reset(); |
||||||
|
} |
@ -0,0 +1,11 @@ |
|||||||
|
#ifndef STRING_POOL_H_ |
||||||
|
#define STRING_POOL_H_ |
||||||
|
|
||||||
|
uint32_t pool_intern(const char *key); |
||||||
|
const char *pool_fetch(uint32_t entry); |
||||||
|
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); |
||||||
|
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); |
||||||
|
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); |
||||||
|
void pool_reset(void); |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,43 @@ |
|||||||
|
string_pool API |
||||||
|
=============== |
||||||
|
|
||||||
|
The string_pool API provides facilities for replacing strings |
||||||
|
with integer keys that can be more easily compared and stored. |
||||||
|
The facilities are designed so that one could teach Git without |
||||||
|
too much trouble to store the information needed for these keys to |
||||||
|
remain valid over multiple executions. |
||||||
|
|
||||||
|
Functions |
||||||
|
--------- |
||||||
|
|
||||||
|
pool_intern:: |
||||||
|
Include a string in the string pool and get its key. |
||||||
|
If that string is already in the pool, retrieves its |
||||||
|
existing key. |
||||||
|
|
||||||
|
pool_fetch:: |
||||||
|
Retrieve the string associated to a given key. |
||||||
|
|
||||||
|
pool_tok_r:: |
||||||
|
Extract the key of the next token from a string. |
||||||
|
Interface mimics strtok_r. |
||||||
|
|
||||||
|
pool_print_seq:: |
||||||
|
Print a sequence of strings named by key to a file, using the |
||||||
|
specified delimiter to separate them. |
||||||
|
|
||||||
|
If NULL (key ~0) appears in the sequence, the sequence ends |
||||||
|
early. |
||||||
|
|
||||||
|
pool_tok_seq:: |
||||||
|
Split a string into tokens, storing the keys of segments |
||||||
|
into a caller-provided array. |
||||||
|
|
||||||
|
Unless sz is 0, the array will always be ~0-terminated. |
||||||
|
If there is not enough room for all the tokens, the |
||||||
|
array holds as many tokens as fit in the entries before |
||||||
|
the terminating ~0. Return value is the index after the |
||||||
|
last token, or sz if the tokens did not fit. |
||||||
|
|
||||||
|
pool_reset:: |
||||||
|
Deallocate storage for the string pool. |
Loading…
Reference in new issue