Browse Source

string-list: multi-delimiter `string_list_split_in_place()`

Enhance `string_list_split_in_place()` to accept multiple characters as
delimiters instead of a single character.

Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.

When only a single delimiting character is provided, `strpbrk(2)` (which
is implemented with `strcspn(2)`) has equivalent performance to
`strchr(2)`. Modern `strcspn(2)` implementations treat an empty
delimiter or the singleton delimiter as a special case and fall back to
calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)`
this way.

This change is one step to removing `strtok(2)` from the tree. Note that
`string_list_split_in_place()` is not a strict replacement for
`strtok()`, since it will happily turn sequential delimiter characters
into empty entries in the resulting string_list. For example:

    string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1)

would yield a string list of:

    ["foo", "", "", "bar", "", "", "baz"]

Callers that wish to emulate the behavior of strtok(2) more directly
should call `string_list_remove_empty_items()` after splitting.

To avoid regressions for the new multi-character delimter cases, update
t0063 in this patch as well.

[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
main
Taylor Blau 2 years ago committed by Junio C Hamano
parent
commit
52acddf36c
  1. 4
      builtin/gc.c
  2. 2
      diff.c
  3. 2
      notes.c
  4. 2
      refs/packed-backend.c
  5. 4
      string-list.c
  6. 2
      string-list.h
  7. 4
      t/helper/test-string-list.c
  8. 51
      t/t0063-string-list.sh

4
builtin/gc.c

@ -1687,11 +1687,11 @@ static int get_schedule_cmd(const char **cmd, int *is_available) @@ -1687,11 +1687,11 @@ static int get_schedule_cmd(const char **cmd, int *is_available)
if (is_available)
*is_available = 0;

string_list_split_in_place(&list, testing, ',', -1);
string_list_split_in_place(&list, testing, ",", -1);
for_each_string_list_item(item, &list) {
struct string_list pair = STRING_LIST_INIT_NODUP;

if (string_list_split_in_place(&pair, item->string, ':', 2) != 2)
if (string_list_split_in_place(&pair, item->string, ":", 2) != 2)
continue;

if (!strcmp(*cmd, pair.items[0].string)) {

2
diff.c

@ -134,7 +134,7 @@ static int parse_dirstat_params(struct diff_options *options, const char *params @@ -134,7 +134,7 @@ static int parse_dirstat_params(struct diff_options *options, const char *params
int i;

if (*params_copy)
string_list_split_in_place(&params, params_copy, ',', -1);
string_list_split_in_place(&params, params_copy, ",", -1);
for (i = 0; i < params.nr; i++) {
const char *p = params.items[i].string;
if (!strcmp(p, "changes")) {

2
notes.c

@ -963,7 +963,7 @@ void string_list_add_refs_from_colon_sep(struct string_list *list, @@ -963,7 +963,7 @@ void string_list_add_refs_from_colon_sep(struct string_list *list,
char *globs_copy = xstrdup(globs);
int i;

string_list_split_in_place(&split, globs_copy, ':', -1);
string_list_split_in_place(&split, globs_copy, ":", -1);
string_list_remove_empty_items(&split, 0);

for (i = 0; i < split.nr; i++)

2
refs/packed-backend.c

@ -650,7 +650,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs) @@ -650,7 +650,7 @@ static struct snapshot *create_snapshot(struct packed_ref_store *refs)
snapshot->buf,
snapshot->eof - snapshot->buf);

string_list_split_in_place(&traits, p, ' ', -1);
string_list_split_in_place(&traits, p, " ", -1);

if (unsorted_string_list_has_string(&traits, "fully-peeled"))
snapshot->peeled = PEELED_FULLY;

4
string-list.c

@ -301,7 +301,7 @@ int string_list_split(struct string_list *list, const char *string, @@ -301,7 +301,7 @@ int string_list_split(struct string_list *list, const char *string,
}

int string_list_split_in_place(struct string_list *list, char *string,
int delim, int maxsplit)
const char *delim, int maxsplit)
{
int count = 0;
char *p = string, *end;
@ -315,7 +315,7 @@ int string_list_split_in_place(struct string_list *list, char *string, @@ -315,7 +315,7 @@ int string_list_split_in_place(struct string_list *list, char *string,
string_list_append(list, p);
return count;
}
end = strchr(p, delim);
end = strpbrk(p, delim);
if (end) {
*end = '\0';
string_list_append(list, p);

2
string-list.h

@ -270,5 +270,5 @@ int string_list_split(struct string_list *list, const char *string, @@ -270,5 +270,5 @@ int string_list_split(struct string_list *list, const char *string,
* list->strdup_strings must *not* be set.
*/
int string_list_split_in_place(struct string_list *list, char *string,
int delim, int maxsplit);
const char *delim, int maxsplit);
#endif /* STRING_LIST_H */

4
t/helper/test-string-list.c

@ -62,7 +62,7 @@ int cmd__string_list(int argc, const char **argv) @@ -62,7 +62,7 @@ int cmd__string_list(int argc, const char **argv)
struct string_list list = STRING_LIST_INIT_NODUP;
int i;
char *s = xstrdup(argv[2]);
int delim = *argv[3];
const char *delim = argv[3];
int maxsplit = atoi(argv[4]);

i = string_list_split_in_place(&list, s, delim, maxsplit);
@ -111,7 +111,7 @@ int cmd__string_list(int argc, const char **argv) @@ -111,7 +111,7 @@ int cmd__string_list(int argc, const char **argv)
*/
if (sb.len && sb.buf[sb.len - 1] == '\n')
strbuf_setlen(&sb, sb.len - 1);
string_list_split_in_place(&list, sb.buf, '\n', -1);
string_list_split_in_place(&list, sb.buf, "\n", -1);

string_list_sort(&list);


51
t/t0063-string-list.sh

@ -18,6 +18,14 @@ test_split () { @@ -18,6 +18,14 @@ test_split () {
"
}

test_split_in_place() {
cat >expected &&
test_expect_success "split (in place) $1 at $2, max $3" "
test-tool string-list split_in_place '$1' '$2' '$3' >actual &&
test_cmp expected actual
"
}

test_split "foo:bar:baz" ":" "-1" <<EOF
3
[0]: "foo"
@ -61,6 +69,49 @@ test_split ":" ":" "-1" <<EOF @@ -61,6 +69,49 @@ test_split ":" ":" "-1" <<EOF
[1]: ""
EOF

test_split_in_place "foo:;:bar:;:baz:;:" ":;" "-1" <<EOF
10
[0]: "foo"
[1]: ""
[2]: ""
[3]: "bar"
[4]: ""
[5]: ""
[6]: "baz"
[7]: ""
[8]: ""
[9]: ""
EOF

test_split_in_place "foo:;:bar:;:baz" ":;" "0" <<EOF
1
[0]: "foo:;:bar:;:baz"
EOF

test_split_in_place "foo:;:bar:;:baz" ":;" "1" <<EOF
2
[0]: "foo"
[1]: ";:bar:;:baz"
EOF

test_split_in_place "foo:;:bar:;:baz" ":;" "2" <<EOF
3
[0]: "foo"
[1]: ""
[2]: ":bar:;:baz"
EOF

test_split_in_place "foo:;:bar:;:" ":;" "-1" <<EOF
7
[0]: "foo"
[1]: ""
[2]: ""
[3]: "bar"
[4]: ""
[5]: ""
[6]: ""
EOF

test_expect_success "test filter_string_list" '
test "x-" = "x$(test-tool string-list filter - y)" &&
test "x-" = "x$(test-tool string-list filter no y)" &&

Loading…
Cancel
Save