Browse Source

Add string-specific memory pool

Intern strings so they can be compared by address and stored without
wasting space.

This library uses the macros in the obj_pool.h and trp.h to create a
memory pool for strings and expose an API for handling them.

[rr: added API docs]
[jn: with some API simplifications, new documentation and tests]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
maint
David Barr 14 years ago committed by Junio C Hamano
parent
commit
1d73b52f5b
  1. 1
      .gitignore
  2. 9
      Makefile
  3. 16
      t/t0080-vcs-svn.sh
  4. 31
      test-string-pool.c
  5. 102
      vcs-svn/string_pool.c
  6. 11
      vcs-svn/string_pool.h
  7. 43
      vcs-svn/string_pool.txt

1
.gitignore vendored

@ -173,6 +173,7 @@ @@ -173,6 +173,7 @@
/test-run-command
/test-sha1
/test-sigchain
/test-string-pool
/test-treap
/common-cmds.h
*.tar.gz

9
Makefile

@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils @@ -415,6 +415,7 @@ TEST_PROGRAMS_NEED_X += test-path-utils
TEST_PROGRAMS_NEED_X += test-run-command
TEST_PROGRAMS_NEED_X += test-sha1
TEST_PROGRAMS_NEED_X += test-sigchain
TEST_PROGRAMS_NEED_X += test-string-pool
TEST_PROGRAMS_NEED_X += test-treap
TEST_PROGRAMS_NEED_X += test-index-version

@ -1742,7 +1743,7 @@ ifndef NO_CURL @@ -1742,7 +1743,7 @@ ifndef NO_CURL
endif
XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \
xdiff/xmerge.o xdiff/xpatience.o
VCSSVN_OBJS =
VCSSVN_OBJS = vcs-svn/string_pool.o
OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS)

dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d)
@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \ @@ -1867,7 +1868,7 @@ xdiff-interface.o $(XDIFF_OBJS): \
xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h

$(VCSSVN_OBJS): \
vcs-svn/obj_pool.h vcs-svn/trp.h
vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h
endif

exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \
@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o @@ -2018,10 +2019,12 @@ test-delta$X: diff-delta.o patch-delta.o

test-parse-options$X: parse-options.o

test-string-pool$X: vcs-svn/lib.a

.PRECIOUS: $(TEST_OBJS)

test-%$X: test-%.o $(GITLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS)

check-sha1:: test-sha1$X
./test-sha1.sh

16
t/t0080-vcs-svn.sh

@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' ' @@ -76,6 +76,22 @@ test_expect_success 'obj pool: high-water mark' '
test_cmp expected actual
'

test_expect_success 'string pool' '
echo a does not equal b >expected.differ &&
echo a equals a >expected.match &&
echo equals equals equals >expected.matchmore &&

test-string-pool "a,--b" >actual.differ &&
test-string-pool "a,a" >actual.match &&
test-string-pool "equals-equals" >actual.matchmore &&
test_must_fail test-string-pool a,a,a &&
test_must_fail test-string-pool a &&

test_cmp expected.differ actual.differ &&
test_cmp expected.match actual.match &&
test_cmp expected.matchmore actual.matchmore
'

test_expect_success 'treap sort' '
cat <<-\EOF >unsorted &&
68

31
test-string-pool.c

@ -0,0 +1,31 @@ @@ -0,0 +1,31 @@
/*
* test-string-pool.c: code to exercise the svn importer's string pool
*/

#include "git-compat-util.h"
#include "vcs-svn/string_pool.h"

int main(int argc, char *argv[])
{
const uint32_t unequal = pool_intern("does not equal");
const uint32_t equal = pool_intern("equals");
uint32_t buf[3];
uint32_t n;

if (argc != 2)
usage("test-string-pool <string>,<string>");

n = pool_tok_seq(3, buf, ",-", argv[1]);
if (n >= 3)
die("too many strings");
if (n <= 1)
die("too few strings");

buf[2] = buf[1];
buf[1] = (buf[0] == buf[2]) ? equal : unequal;
pool_print_seq(3, buf, ' ', stdout);
fputc('\n', stdout);

pool_reset();
return 0;
}

102
vcs-svn/string_pool.c

@ -0,0 +1,102 @@ @@ -0,0 +1,102 @@
/*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/

#include "git-compat-util.h"
#include "trp.h"
#include "obj_pool.h"
#include "string_pool.h"

static struct trp_root tree = { ~0 };

struct node {
uint32_t offset;
struct trp_node children;
};

/* Two memory pools: one for struct node, and another for strings */
obj_pool_gen(node, struct node, 4096)
obj_pool_gen(string, char, 4096)

static char *node_value(struct node *node)
{
return node ? string_pointer(node->offset) : NULL;
}

static int node_cmp(struct node *a, struct node *b)
{
return strcmp(node_value(a), node_value(b));
}

/* Build a Treap from the node structure (a trp_node w/ offset) */
trp_gen(static, tree_, struct node, children, node, node_cmp);

const char *pool_fetch(uint32_t entry)
{
return node_value(node_pointer(entry));
}

uint32_t pool_intern(const char *key)
{
/* Canonicalize key */
struct node *match = NULL, *node;
uint32_t key_len;
if (key == NULL)
return ~0;
key_len = strlen(key) + 1;
node = node_pointer(node_alloc(1));
node->offset = string_alloc(key_len);
strcpy(node_value(node), key);
match = tree_search(&tree, node);
if (!match) {
tree_insert(&tree, node);
} else {
node_free(1);
string_free(key_len);
node = match;
}
return node_offset(node);
}

uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
{
char *token = strtok_r(str, delim, saveptr);
return token ? pool_intern(token) : ~0;
}

void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
{
uint32_t i;
for (i = 0; i < len && ~seq[i]; i++) {
fputs(pool_fetch(seq[i]), stream);
if (i < len - 1 && ~seq[i + 1])
fputc(delim, stream);
}
}

uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str)
{
char *context = NULL;
uint32_t token = ~0;
uint32_t length;

if (sz == 0)
return ~0;
if (str)
token = pool_tok_r(str, delim, &context);
for (length = 0; length < sz; length++) {
seq[length] = token;
if (token == ~0)
return length;
token = pool_tok_r(NULL, delim, &context);
}
seq[sz - 1] = ~0;
return sz;
}

void pool_reset(void)
{
node_reset();
string_reset();
}

11
vcs-svn/string_pool.h

@ -0,0 +1,11 @@ @@ -0,0 +1,11 @@
#ifndef STRING_POOL_H_
#define STRING_POOL_H_

uint32_t pool_intern(const char *key);
const char *pool_fetch(uint32_t entry);
uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
void pool_reset(void);

#endif

43
vcs-svn/string_pool.txt

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
string_pool API
===============

The string_pool API provides facilities for replacing strings
with integer keys that can be more easily compared and stored.
The facilities are designed so that one could teach Git without
too much trouble to store the information needed for these keys to
remain valid over multiple executions.

Functions
---------

pool_intern::
Include a string in the string pool and get its key.
If that string is already in the pool, retrieves its
existing key.

pool_fetch::
Retrieve the string associated to a given key.

pool_tok_r::
Extract the key of the next token from a string.
Interface mimics strtok_r.

pool_print_seq::
Print a sequence of strings named by key to a file, using the
specified delimiter to separate them.

If NULL (key ~0) appears in the sequence, the sequence ends
early.

pool_tok_seq::
Split a string into tokens, storing the keys of segments
into a caller-provided array.

Unless sz is 0, the array will always be ~0-terminated.
If there is not enough room for all the tokens, the
array holds as many tokens as fit in the entries before
the terminating ~0. Return value is the index after the
last token, or sz if the tokens did not fit.

pool_reset::
Deallocate storage for the string pool.
Loading…
Cancel
Save