You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
155 lines
5.1 KiB
155 lines
5.1 KiB
Upstream commit: |
|
|
|
commit 7e2f0d2d77e4bc273fe00f99d970605d8e38d4d6 |
|
Author: Andreas Schwab <schwab@suse.de> |
|
Date: Mon Feb 4 10:16:33 2013 +0100 |
|
|
|
Fix handling of collating symbols in regexps |
|
|
|
From c1b97d6d896b1f22fdf5d28471ef7859ec840a57 Mon Sep 17 00:00:00 2001 |
|
From: Andreas Schwab <schwab@redhat.com> |
|
Date: Wed, 1 Sep 2010 17:26:15 +0200 |
|
Subject: [PATCH] Fix handling of collating symbols in regexps |
|
|
|
[BZ #11561] |
|
* posix/regcomp.c (parse_bracket_exp): When looking up collating |
|
elements compare against the byte sequence of it, not its name. |
|
|
|
--- |
|
ChangeLog | 4 +++ |
|
posix/regcomp.c | 72 ++++++++++++++++++++---------------------------------- |
|
2 files changed, 31 insertions(+), 45 deletions(-) |
|
|
|
--- glibc-2.17-c758a686/posix/regcomp.c |
|
+++ glibc-2.17-c758a686/posix/regcomp.c |
|
@@ -2772,40 +2772,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
|
|
|
/* Local function for parse_bracket_exp used in _LIBC environement. |
|
Seek the collating symbol entry correspondings to NAME. |
|
- Return the index of the symbol in the SYMB_TABLE. */ |
|
+ Return the index of the symbol in the SYMB_TABLE, |
|
+ or -1 if not found. */ |
|
|
|
auto inline int32_t |
|
__attribute ((always_inline)) |
|
- seek_collating_symbol_entry (name, name_len) |
|
- const unsigned char *name; |
|
- size_t name_len; |
|
+ seek_collating_symbol_entry (const unsigned char *name, size_t name_len) |
|
{ |
|
- int32_t hash = elem_hash ((const char *) name, name_len); |
|
- int32_t elem = hash % table_size; |
|
- if (symb_table[2 * elem] != 0) |
|
- { |
|
- int32_t second = hash % (table_size - 2) + 1; |
|
- |
|
- do |
|
- { |
|
- /* First compare the hashing value. */ |
|
- if (symb_table[2 * elem] == hash |
|
- /* Compare the length of the name. */ |
|
- && name_len == extra[symb_table[2 * elem + 1]] |
|
- /* Compare the name. */ |
|
- && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], |
|
- name_len) == 0) |
|
- { |
|
- /* Yep, this is the entry. */ |
|
- break; |
|
- } |
|
+ int32_t elem; |
|
|
|
- /* Next entry. */ |
|
- elem += second; |
|
- } |
|
- while (symb_table[2 * elem] != 0); |
|
- } |
|
- return elem; |
|
+ for (elem = 0; elem < table_size; elem++) |
|
+ if (symb_table[2 * elem] != 0) |
|
+ { |
|
+ int32_t idx = symb_table[2 * elem + 1]; |
|
+ /* Skip the name of collating element name. */ |
|
+ idx += 1 + extra[idx]; |
|
+ if (/* Compare the length of the name. */ |
|
+ name_len == extra[idx] |
|
+ /* Compare the name. */ |
|
+ && memcmp (name, &extra[idx + 1], name_len) == 0) |
|
+ /* Yep, this is the entry. */ |
|
+ return elem; |
|
+ } |
|
+ return -1; |
|
} |
|
|
|
/* Local function for parse_bracket_exp used in _LIBC environment. |
|
@@ -2814,8 +2803,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
|
|
|
auto inline unsigned int |
|
__attribute ((always_inline)) |
|
- lookup_collation_sequence_value (br_elem) |
|
- bracket_elem_t *br_elem; |
|
+ lookup_collation_sequence_value (bracket_elem_t *br_elem) |
|
{ |
|
if (br_elem->type == SB_CHAR) |
|
{ |
|
@@ -2843,7 +2831,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
|
int32_t elem, idx; |
|
elem = seek_collating_symbol_entry (br_elem->opr.name, |
|
sym_name_len); |
|
- if (symb_table[2 * elem] != 0) |
|
+ if (elem != -1) |
|
{ |
|
/* We found the entry. */ |
|
idx = symb_table[2 * elem + 1]; |
|
@@ -2861,7 +2849,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
|
/* Return the collation sequence value. */ |
|
return *(unsigned int *) (extra + idx); |
|
} |
|
- else if (symb_table[2 * elem] == 0 && sym_name_len == 1) |
|
+ else if (sym_name_len == 1) |
|
{ |
|
/* No valid character. Match it as a single byte |
|
character. */ |
|
@@ -2883,11 +2871,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
|
|
|
auto inline reg_errcode_t |
|
__attribute ((always_inline)) |
|
- build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) |
|
- re_charset_t *mbcset; |
|
- int *range_alloc; |
|
- bitset_t sbcset; |
|
- bracket_elem_t *start_elem, *end_elem; |
|
+ build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, |
|
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem) |
|
{ |
|
unsigned int ch; |
|
uint32_t start_collseq; |
|
@@ -2966,25 +2951,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, |
|
|
|
auto inline reg_errcode_t |
|
__attribute ((always_inline)) |
|
- build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) |
|
- re_charset_t *mbcset; |
|
- int *coll_sym_alloc; |
|
- bitset_t sbcset; |
|
- const unsigned char *name; |
|
+ build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, |
|
+ int *coll_sym_alloc, const unsigned char *name) |
|
{ |
|
int32_t elem, idx; |
|
size_t name_len = strlen ((const char *) name); |
|
if (nrules != 0) |
|
{ |
|
elem = seek_collating_symbol_entry (name, name_len); |
|
- if (symb_table[2 * elem] != 0) |
|
+ if (elem != -1) |
|
{ |
|
/* We found the entry. */ |
|
idx = symb_table[2 * elem + 1]; |
|
/* Skip the name of collating element name. */ |
|
idx += 1 + extra[idx]; |
|
} |
|
- else if (symb_table[2 * elem] == 0 && name_len == 1) |
|
+ else if (name_len == 1) |
|
{ |
|
/* No valid character, treat it as a normal |
|
character. */
|
|
|