Toshaan Bharvani
2 years ago
34 changed files with 423 additions and 86 deletions
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
2022-04-21 Jakub Jelinek <jakub@redhat.com> |
||||
|
||||
PR target/105331 |
||||
* config/i386/i386.c (ix86_gimplify_va_arg): Mark va_arg_tmp |
||||
temporary TREE_ADDRESSABLE before trying to gimplify ADDR_EXPR |
||||
of it. |
||||
|
||||
* gcc.dg/pr105331.c: New test. |
||||
|
||||
--- gcc/config/i386/i386.c.jj 2022-04-12 09:20:07.566662842 +0200 |
||||
+++ gcc/config/i386/i386.c 2022-04-21 12:03:32.201951522 +0200 |
||||
@@ -4891,6 +4891,7 @@ ix86_gimplify_va_arg (tree valist, tree |
||||
{ |
||||
int i, prev_size = 0; |
||||
tree temp = create_tmp_var (type, "va_arg_tmp"); |
||||
+ TREE_ADDRESSABLE (temp) = 1; |
||||
|
||||
/* addr = &temp; */ |
||||
t = build1 (ADDR_EXPR, build_pointer_type (type), temp); |
||||
--- gcc/testsuite/gcc.dg/pr105331.c.jj 2022-04-21 12:09:34.398906718 +0200 |
||||
+++ gcc/testsuite/gcc.dg/pr105331.c 2022-04-21 12:09:07.304283903 +0200 |
||||
@@ -0,0 +1,11 @@ |
||||
+/* PR target/105331 */ |
||||
+/* { dg-do compile } */ |
||||
+/* { dg-options "-O -Wuninitialized" } */ |
||||
+ |
||||
+#include <stdarg.h> |
||||
+ |
||||
+int |
||||
+foo (va_list *va) |
||||
+{ |
||||
+ return va_arg (*va, double _Complex); /* { dg-bogus "may be used uninitialized" } */ |
||||
+} |
@ -0,0 +1,388 @@
@@ -0,0 +1,388 @@
|
||||
commit c725028a8bb9478ec84332641147ad12b9236922 |
||||
Author: Jonathan Wakely <jwakely@redhat.com> |
||||
Date: Tue Dec 14 14:32:35 2021 +0000 |
||||
|
||||
libstdc++: Fix handling of invalid ranges in std::regex [PR102447] |
||||
|
||||
std::regex currently allows invalid bracket ranges such as [\w-a] which |
||||
are only allowed by ECMAScript when in web browser compatibility mode. |
||||
It should be an error, because the start of the range is a character |
||||
class, not a single character. The current implementation of |
||||
_Compiler::_M_expression_term does not provide a way to reject this, |
||||
because we only remember a previous character, not whether we just |
||||
processed a character class (or collating symbol etc.) |
||||
|
||||
This patch replaces the pair<bool, CharT> used to emulate |
||||
optional<CharT> with a custom class closer to pair<tribool,CharT>. That |
||||
allows us to track three states, so that we can tell when we've just |
||||
seen a character class. |
||||
|
||||
With this additional state the code in _M_expression_term for processing |
||||
the _S_token_bracket_dash can be improved to correctly reject the [\w-a] |
||||
case, without regressing for valid cases such as [\w-] and [----]. |
||||
|
||||
libstdc++-v3/ChangeLog: |
||||
|
||||
PR libstdc++/102447 |
||||
* include/bits/regex_compiler.h (_Compiler::_BracketState): New |
||||
class. |
||||
(_Compiler::_BrackeyMatcher): New alias template. |
||||
(_Compiler::_M_expression_term): Change pair<bool, CharT> |
||||
parameter to _BracketState. Process first character for |
||||
ECMAScript syntax as well as POSIX. |
||||
* include/bits/regex_compiler.tcc |
||||
(_Compiler::_M_insert_bracket_matcher): Pass _BracketState. |
||||
(_Compiler::_M_expression_term): Use _BracketState to store |
||||
state between calls. Improve handling of dashes in ranges. |
||||
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc: |
||||
Add more tests for ranges containing dashes. Check invalid |
||||
ranges with character class at the beginning. |
||||
|
||||
(cherry picked from commit 7ce3c230edf6e498e125c805a6dd313bf87dc439) |
||||
|
||||
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h |
||||
index f224fcb06e0..aa19df2bf9a 100644 |
||||
--- a/libstdc++-v3/include/bits/regex_compiler.h |
||||
+++ b/libstdc++-v3/include/bits/regex_compiler.h |
||||
@@ -122,13 +122,45 @@ namespace __detail |
||||
void |
||||
_M_insert_bracket_matcher(bool __neg); |
||||
|
||||
- // Returns true if successfully matched one term and should continue. |
||||
+ // Cache of the last atom seen in a bracketed range expression. |
||||
+ struct _BracketState |
||||
+ { |
||||
+ enum class _Type : char { _None, _Char, _Class } _M_type = _Type::_None; |
||||
+ _CharT _M_char; |
||||
+ |
||||
+ void |
||||
+ set(_CharT __c) noexcept { _M_type = _Type::_Char; _M_char = __c; } |
||||
+ |
||||
+ _GLIBCXX_NODISCARD _CharT |
||||
+ get() const noexcept { return _M_char; } |
||||
+ |
||||
+ void |
||||
+ reset(_Type __t = _Type::_None) noexcept { _M_type = __t; } |
||||
+ |
||||
+ explicit operator bool() const noexcept |
||||
+ { return _M_type != _Type::_None; } |
||||
+ |
||||
+ // Previous token was a single character. |
||||
+ _GLIBCXX_NODISCARD bool |
||||
+ _M_is_char() const noexcept { return _M_type == _Type::_Char; } |
||||
+ |
||||
+ // Previous token was a character class, equivalent class, |
||||
+ // collating symbol etc. |
||||
+ _GLIBCXX_NODISCARD bool |
||||
+ _M_is_class() const noexcept { return _M_type == _Type::_Class; } |
||||
+ }; |
||||
+ |
||||
+ template<bool __icase, bool __collate> |
||||
+ using _BracketMatcher |
||||
+ = std::__detail::_BracketMatcher<_TraitsT, __icase, __collate>; |
||||
+ |
||||
+ // Returns true if successfully parsed one term and should continue |
||||
+ // compiling a bracket expression. |
||||
// Returns false if the compiler should move on. |
||||
template<bool __icase, bool __collate> |
||||
bool |
||||
- _M_expression_term(pair<bool, _CharT>& __last_char, |
||||
- _BracketMatcher<_TraitsT, __icase, __collate>& |
||||
- __matcher); |
||||
+ _M_expression_term(_BracketState& __last_char, |
||||
+ _BracketMatcher<__icase, __collate>& __matcher); |
||||
|
||||
int |
||||
_M_cur_int_value(int __radix); |
||||
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc |
||||
index ea07bc2428e..7769a9e63a3 100644 |
||||
--- a/libstdc++-v3/include/bits/regex_compiler.tcc |
||||
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc |
||||
@@ -403,7 +403,7 @@ namespace __detail |
||||
_M_insert_character_class_matcher() |
||||
{ |
||||
__glibcxx_assert(_M_value.size() == 1); |
||||
- _BracketMatcher<_TraitsT, __icase, __collate> __matcher |
||||
+ _BracketMatcher<__icase, __collate> __matcher |
||||
(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); |
||||
__matcher._M_add_character_class(_M_value, false); |
||||
__matcher._M_ready(); |
||||
@@ -424,25 +424,17 @@ namespace __detail |
||||
_Compiler<_TraitsT>:: |
||||
_M_insert_bracket_matcher(bool __neg) |
||||
{ |
||||
- _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); |
||||
- pair<bool, _CharT> __last_char; // Optional<_CharT> |
||||
- __last_char.first = false; |
||||
- if (!(_M_flags & regex_constants::ECMAScript)) |
||||
- { |
||||
- if (_M_try_char()) |
||||
- { |
||||
- __last_char.first = true; |
||||
- __last_char.second = _M_value[0]; |
||||
- } |
||||
- else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
||||
- { |
||||
- __last_char.first = true; |
||||
- __last_char.second = '-'; |
||||
- } |
||||
- } |
||||
- while (_M_expression_term(__last_char, __matcher)); |
||||
- if (__last_char.first) |
||||
- __matcher._M_add_char(__last_char.second); |
||||
+ _BracketMatcher<__icase, __collate> __matcher(__neg, _M_traits); |
||||
+ _BracketState __last_char; |
||||
+ if (_M_try_char()) |
||||
+ __last_char.set(_M_value[0]); |
||||
+ else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
||||
+ // Dash as first character is a normal character. |
||||
+ __last_char.set('-'); |
||||
+ while (_M_expression_term(__last_char, __matcher)) |
||||
+ ; |
||||
+ if (__last_char._M_is_char()) |
||||
+ __matcher._M_add_char(__last_char.get()); |
||||
__matcher._M_ready(); |
||||
_M_stack.push(_StateSeqT( |
||||
*_M_nfa, |
||||
@@ -447,27 +438,27 @@ namespace __detail |
||||
template<bool __icase, bool __collate> |
||||
bool |
||||
_Compiler<_TraitsT>:: |
||||
- _M_expression_term(pair<bool, _CharT>& __last_char, |
||||
- _BracketMatcher<_TraitsT, __icase, __collate>& __matcher) |
||||
+ _M_expression_term(_BracketState& __last_char, |
||||
+ _BracketMatcher<__icase, __collate>& __matcher) |
||||
{ |
||||
if (_M_match_token(_ScannerT::_S_token_bracket_end)) |
||||
return false; |
||||
|
||||
+ // Add any previously cached char into the matcher and update cache. |
||||
const auto __push_char = [&](_CharT __ch) |
||||
{ |
||||
- if (__last_char.first) |
||||
- __matcher._M_add_char(__last_char.second); |
||||
- else |
||||
- __last_char.first = true; |
||||
- __last_char.second = __ch; |
||||
+ if (__last_char._M_is_char()) |
||||
+ __matcher._M_add_char(__last_char.get()); |
||||
+ __last_char.set(__ch); |
||||
}; |
||||
- const auto __flush = [&] |
||||
+ // Add any previously cached char into the matcher and update cache. |
||||
+ const auto __push_class = [&] |
||||
{ |
||||
- if (__last_char.first) |
||||
- { |
||||
- __matcher._M_add_char(__last_char.second); |
||||
- __last_char.first = false; |
||||
- } |
||||
+ if (__last_char._M_is_char()) |
||||
+ __matcher._M_add_char(__last_char.get()); |
||||
+ // We don't cache anything here, just record that the last thing |
||||
+ // processed was a character class (or similar). |
||||
+ __last_char.reset(_BracketState::_Type::_Class); |
||||
}; |
||||
|
||||
if (_M_match_token(_ScannerT::_S_token_collsymbol)) |
||||
@@ -476,16 +467,16 @@ namespace __detail |
||||
if (__symbol.size() == 1) |
||||
__push_char(__symbol[0]); |
||||
else |
||||
- __flush(); |
||||
+ __push_class(); |
||||
} |
||||
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) |
||||
{ |
||||
- __flush(); |
||||
+ __push_class(); |
||||
__matcher._M_add_equivalence_class(_M_value); |
||||
} |
||||
else if (_M_match_token(_ScannerT::_S_token_char_class_name)) |
||||
{ |
||||
- __flush(); |
||||
+ __push_class(); |
||||
__matcher._M_add_character_class(_M_value, false); |
||||
} |
||||
else if (_M_try_char()) |
||||
@@ -502,49 +493,50 @@ namespace __detail |
||||
// It turns out that no one reads BNFs ;) |
||||
else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
||||
{ |
||||
- if (!__last_char.first) |
||||
+ if (_M_match_token(_ScannerT::_S_token_bracket_end)) |
||||
{ |
||||
- if (!(_M_flags & regex_constants::ECMAScript)) |
||||
- { |
||||
- if (_M_match_token(_ScannerT::_S_token_bracket_end)) |
||||
- { |
||||
- __push_char('-'); |
||||
- return false; |
||||
- } |
||||
- __throw_regex_error( |
||||
- regex_constants::error_range, |
||||
- "Unexpected dash in bracket expression. For POSIX syntax, " |
||||
- "a dash is not treated literally only when it is at " |
||||
- "beginning or end."); |
||||
- } |
||||
+ // For "-]" the dash is a literal character. |
||||
__push_char('-'); |
||||
+ return false; |
||||
} |
||||
- else |
||||
+ else if (__last_char._M_is_class()) |
||||
+ { |
||||
+ // "\\w-" is invalid, start of range must be a single char. |
||||
+ __throw_regex_error(regex_constants::error_range, |
||||
+ "Invalid start of range in bracket expression."); |
||||
+ } |
||||
+ else if (__last_char._M_is_char()) |
||||
{ |
||||
if (_M_try_char()) |
||||
{ |
||||
- __matcher._M_make_range(__last_char.second, _M_value[0]); |
||||
- __last_char.first = false; |
||||
+ // "x-y" |
||||
+ __matcher._M_make_range(__last_char.get(), _M_value[0]); |
||||
+ __last_char.reset(); |
||||
} |
||||
else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) |
||||
{ |
||||
- __matcher._M_make_range(__last_char.second, '-'); |
||||
- __last_char.first = false; |
||||
+ // "x--" |
||||
+ __matcher._M_make_range(__last_char.get(), '-'); |
||||
+ __last_char.reset(); |
||||
} |
||||
else |
||||
- { |
||||
- if (_M_scanner._M_get_token() |
||||
- != _ScannerT::_S_token_bracket_end) |
||||
- __throw_regex_error( |
||||
- regex_constants::error_range, |
||||
- "Character is expected after a dash."); |
||||
- __push_char('-'); |
||||
- } |
||||
+ __throw_regex_error(regex_constants::error_range, |
||||
+ "Invalid end of range in bracket expression."); |
||||
} |
||||
+ else if (_M_flags & regex_constants::ECMAScript) |
||||
+ { |
||||
+ // A dash that is not part of an existing range. Might be the |
||||
+ // start of a new range, or might just be a literal '-' char. |
||||
+ // Only ECMAScript allows that in the middle of a bracket expr. |
||||
+ __push_char('-'); |
||||
+ } |
||||
+ else |
||||
+ __throw_regex_error(regex_constants::error_range, |
||||
+ "Invalid dash in bracket expression."); |
||||
} |
||||
else if (_M_match_token(_ScannerT::_S_token_quoted_class)) |
||||
{ |
||||
- __flush(); |
||||
+ __push_class(); |
||||
__matcher._M_add_character_class(_M_value, |
||||
_M_ctype.is(_CtypeT::upper, |
||||
_M_value[0])); |
||||
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc |
||||
index 7df70604ea6..0d76e63da7b 100644 |
||||
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc |
||||
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc |
||||
@@ -69,6 +69,16 @@ test01() |
||||
void |
||||
test02() |
||||
{ |
||||
+ VERIFY(regex_match("-", regex("[-]", regex_constants::ECMAScript))); |
||||
+ VERIFY(regex_match("-", regex("[--]", regex_constants::ECMAScript))); |
||||
+ VERIFY(regex_match("-", regex("[---]", regex_constants::ECMAScript))); |
||||
+ VERIFY(regex_match("-", regex("[----]", regex_constants::ECMAScript))); |
||||
+ VERIFY(regex_match("-", regex("[-----]", regex_constants::ECMAScript))); |
||||
+ |
||||
+ VERIFY(regex_match("-", regex("[-]", regex_constants::extended))); |
||||
+ VERIFY(regex_match("-", regex("[--]", regex_constants::extended))); |
||||
+ VERIFY(regex_match("-", regex("[---]", regex_constants::extended))); |
||||
+ VERIFY(regex_match("-", regex("[----]", regex_constants::extended))); |
||||
try |
||||
{ |
||||
std::regex re("[-----]", std::regex::extended); |
||||
@@ -78,7 +88,6 @@ test02() |
||||
{ |
||||
VERIFY(e.code() == std::regex_constants::error_range); |
||||
} |
||||
- std::regex re("[-----]", std::regex::ECMAScript); |
||||
|
||||
VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended))); |
||||
VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended))); |
||||
@@ -93,7 +102,27 @@ test02() |
||||
} |
||||
catch (const std::regex_error& e) |
||||
{ |
||||
+ VERIFY(e.code() == std::regex_constants::error_range); |
||||
+ } |
||||
+ try |
||||
+ { |
||||
+ regex("[@--]", regex_constants::extended); |
||||
+ VERIFY(false); |
||||
} |
||||
+ catch (const std::regex_error& e) |
||||
+ { |
||||
+ VERIFY(e.code() == std::regex_constants::error_range); |
||||
+ } |
||||
+ try |
||||
+ { |
||||
+ regex("[--%]", regex_constants::extended); |
||||
+ VERIFY(false); |
||||
+ } |
||||
+ catch (const std::regex_error& e) |
||||
+ { |
||||
+ VERIFY(e.code() == std::regex_constants::error_range); |
||||
+ } |
||||
+ |
||||
VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended))); |
||||
} |
||||
|
||||
@@ -158,6 +187,36 @@ test06() |
||||
VERIFY(regex_match("a-", debian_cron_namespace_ok)); |
||||
} |
||||
|
||||
+// libstdc++/102447 |
||||
+void |
||||
+test07() |
||||
+{ |
||||
+ VERIFY(regex_match("-", std::regex("[\\w-]", std::regex::ECMAScript))); |
||||
+ VERIFY(regex_match("a", std::regex("[\\w-]", std::regex::ECMAScript))); |
||||
+ VERIFY(regex_match("-", std::regex("[a-]", std::regex::ECMAScript))); |
||||
+ VERIFY(regex_match("a", std::regex("[a-]", std::regex::ECMAScript))); |
||||
+ |
||||
+ try |
||||
+ { |
||||
+ std::regex re("[\\w-a]", std::regex::ECMAScript); |
||||
+ VERIFY(false); |
||||
+ } |
||||
+ catch (const std::regex_error& e) |
||||
+ { |
||||
+ VERIFY(e.code() == std::regex_constants::error_range); |
||||
+ } |
||||
+ |
||||
+ try |
||||
+ { |
||||
+ std::regex re("[\\w--]", std::regex::ECMAScript); |
||||
+ VERIFY(false); |
||||
+ } |
||||
+ catch (const std::regex_error& e) |
||||
+ { |
||||
+ VERIFY(e.code() == std::regex_constants::error_range); |
||||
+ } |
||||
+} |
||||
+ |
||||
int |
||||
main() |
||||
{ |
||||
@@ -167,6 +226,7 @@ main() |
||||
test04(); |
||||
test05(); |
||||
test06(); |
||||
+ test07(); |
||||
|
||||
return 0; |
||||
} |
Loading…
Reference in new issue