You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
178 lines
6.8 KiB
178 lines
6.8 KiB
3 years ago
|
commit f411207a833d0c49578ebe7062aee3660813ed5f
|
||
|
Author: Nikita Popov <npv1310@gmail.com>
|
||
|
Date: Tue Nov 2 13:21:42 2021 +0500
|
||
|
|
||
|
gconv: Do not emit spurious NUL character in ISO-2022-JP-3 (bug 28524)
|
||
|
|
||
|
Bugfix 27256 has introduced another issue:
|
||
|
In conversion from ISO-2022-JP-3 encoding, it is possible
|
||
|
to force iconv to emit extra NUL character on internal state reset.
|
||
|
To do this, it is sufficient to feed iconv with escape sequence
|
||
|
which switches active character set.
|
||
|
The simplified check 'data->__statep->__count != ASCII_set'
|
||
|
introduced by the aforementioned bugfix picks that case and
|
||
|
behaves as if '\0' character has been queued thus emitting it.
|
||
|
|
||
|
To eliminate this issue, these steps are taken:
|
||
|
* Restore original condition
|
||
|
'(data->__statep->__count & ~7) != ASCII_set'.
|
||
|
It is necessary since bits 0-2 may contain
|
||
|
number of buffered input characters.
|
||
|
* Check that queued character is not NUL.
|
||
|
Similar step is taken for main conversion loop.
|
||
|
|
||
|
Bundled test case follows following logic:
|
||
|
* Try to convert ISO-2022-JP-3 escape sequence
|
||
|
switching active character set
|
||
|
* Reset internal state by providing NULL as input buffer
|
||
|
* Ensure that nothing has been converted.
|
||
|
|
||
|
Signed-off-by: Nikita Popov <npv1310@gmail.com>
|
||
|
(cherry picked from commit ff012870b2c02a62598c04daa1e54632e020fd7d)
|
||
|
|
||
|
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
|
||
|
index c216f959df1413f8..d5507a048c6a6508 100644
|
||
|
--- a/iconvdata/Makefile
|
||
|
+++ b/iconvdata/Makefile
|
||
|
@@ -1,4 +1,5 @@
|
||
|
# Copyright (C) 1997-2021 Free Software Foundation, Inc.
|
||
|
+# Copyright (C) The GNU Toolchain Authors.
|
||
|
# This file is part of the GNU C Library.
|
||
|
|
||
|
# The GNU C Library is free software; you can redistribute it and/or
|
||
|
@@ -74,7 +75,7 @@ ifeq (yes,$(build-shared))
|
||
|
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
|
||
|
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
|
||
|
bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
|
||
|
- bug-iconv13 bug-iconv14
|
||
|
+ bug-iconv13 bug-iconv14 bug-iconv15
|
||
|
ifeq ($(have-thread-library),yes)
|
||
|
tests += bug-iconv3
|
||
|
endif
|
||
|
@@ -327,6 +328,8 @@ $(objpfx)bug-iconv12.out: $(addprefix $(objpfx), $(gconv-modules)) \
|
||
|
$(addprefix $(objpfx),$(modules.so))
|
||
|
$(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
|
||
|
$(addprefix $(objpfx),$(modules.so))
|
||
|
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
|
||
|
+ $(addprefix $(objpfx),$(modules.so))
|
||
|
|
||
|
$(objpfx)iconv-test.out: run-iconv-test.sh \
|
||
|
$(addprefix $(objpfx), $(gconv-modules)) \
|
||
|
diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
|
||
|
new file mode 100644
|
||
|
index 0000000000000000..cc04bd0313a68786
|
||
|
--- /dev/null
|
||
|
+++ b/iconvdata/bug-iconv15.c
|
||
|
@@ -0,0 +1,60 @@
|
||
|
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
|
||
|
+ may emit spurious NUL character on state reset.
|
||
|
+ Copyright (C) The GNU Toolchain Authors.
|
||
|
+ This file is part of the GNU C Library.
|
||
|
+
|
||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||
|
+ modify it under the terms of the GNU Lesser General Public
|
||
|
+ License as published by the Free Software Foundation; either
|
||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||
|
+
|
||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
+ Lesser General Public License for more details.
|
||
|
+
|
||
|
+ You should have received a copy of the GNU Lesser General Public
|
||
|
+ License along with the GNU C Library; if not, see
|
||
|
+ <https://www.gnu.org/licenses/>. */
|
||
|
+
|
||
|
+#include <stddef.h>
|
||
|
+#include <iconv.h>
|
||
|
+#include <support/check.h>
|
||
|
+
|
||
|
+static int
|
||
|
+do_test (void)
|
||
|
+{
|
||
|
+ char in[] = "\x1b(I";
|
||
|
+ char *inbuf = in;
|
||
|
+ size_t inleft = sizeof (in) - 1;
|
||
|
+ char out[1];
|
||
|
+ char *outbuf = out;
|
||
|
+ size_t outleft = sizeof (out);
|
||
|
+ iconv_t cd;
|
||
|
+
|
||
|
+ cd = iconv_open ("UTF8", "ISO-2022-JP-3");
|
||
|
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
|
||
|
+
|
||
|
+ /* First call to iconv should alter internal state.
|
||
|
+ Now, JISX0201_Kana_set is selected and
|
||
|
+ state value != ASCII_set. */
|
||
|
+ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
|
||
|
+
|
||
|
+ /* No bytes should have been added to
|
||
|
+ the output buffer at this point. */
|
||
|
+ TEST_VERIFY (outbuf == out);
|
||
|
+ TEST_VERIFY (outleft == sizeof (out));
|
||
|
+
|
||
|
+ /* Second call shall emit spurious NUL character in unpatched glibc. */
|
||
|
+ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
|
||
|
+
|
||
|
+ /* No characters are expected to be produced. */
|
||
|
+ TEST_VERIFY (outbuf == out);
|
||
|
+ TEST_VERIFY (outleft == sizeof (out));
|
||
|
+
|
||
|
+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+#include <support/test-driver.c>
|
||
|
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
|
||
|
index c8ba88cdc9fe9200..5fc0c0f7397935fe 100644
|
||
|
--- a/iconvdata/iso-2022-jp-3.c
|
||
|
+++ b/iconvdata/iso-2022-jp-3.c
|
||
|
@@ -1,5 +1,6 @@
|
||
|
/* Conversion module for ISO-2022-JP-3.
|
||
|
Copyright (C) 1998-2021 Free Software Foundation, Inc.
|
||
|
+ Copyright (C) The GNU Toolchain Authors.
|
||
|
This file is part of the GNU C Library.
|
||
|
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
|
||
|
and Bruno Haible <bruno@clisp.org>, 2002.
|
||
|
@@ -81,20 +82,31 @@ enum
|
||
|
the output state to the initial state. This has to be done during the
|
||
|
flushing. */
|
||
|
#define EMIT_SHIFT_TO_INIT \
|
||
|
- if (data->__statep->__count != ASCII_set) \
|
||
|
+ if ((data->__statep->__count & ~7) != ASCII_set) \
|
||
|
{ \
|
||
|
if (FROM_DIRECTION) \
|
||
|
{ \
|
||
|
- if (__glibc_likely (outbuf + 4 <= outend)) \
|
||
|
+ uint32_t ch = data->__statep->__count >> 6; \
|
||
|
+ \
|
||
|
+ if (__glibc_unlikely (ch != 0)) \
|
||
|
{ \
|
||
|
- /* Write out the last character. */ \
|
||
|
- *((uint32_t *) outbuf) = data->__statep->__count >> 6; \
|
||
|
- outbuf += sizeof (uint32_t); \
|
||
|
- data->__statep->__count = ASCII_set; \
|
||
|
+ if (__glibc_likely (outbuf + 4 <= outend)) \
|
||
|
+ { \
|
||
|
+ /* Write out the last character. */ \
|
||
|
+ put32u (outbuf, ch); \
|
||
|
+ outbuf += 4; \
|
||
|
+ data->__statep->__count &= 7; \
|
||
|
+ data->__statep->__count |= ASCII_set; \
|
||
|
+ } \
|
||
|
+ else \
|
||
|
+ /* We don't have enough room in the output buffer. */ \
|
||
|
+ status = __GCONV_FULL_OUTPUT; \
|
||
|
} \
|
||
|
else \
|
||
|
- /* We don't have enough room in the output buffer. */ \
|
||
|
- status = __GCONV_FULL_OUTPUT; \
|
||
|
+ { \
|
||
|
+ data->__statep->__count &= 7; \
|
||
|
+ data->__statep->__count |= ASCII_set; \
|
||
|
+ } \
|
||
|
} \
|
||
|
else \
|
||
|
{ \
|