From 3af65474cbec9485534ad00d604c442e42095ee5 Mon Sep 17 00:00:00 2001 From: Kamil Dudka Date: Mon, 21 Mar 2022 15:57:49 +0100 Subject: [PATCH] backport IDN2 support from lynx2.9.0dev.10 --- WWW/Library/Implementation/HTParse.c | 49 +++++++++++++++++++++++++--- WWW/Library/Implementation/HTParse.h | 12 +++++++ aclocal.m4 | 31 ++++++++++++++---- config.hin | 1 + src/LYMain.c | 3 ++ src/LYOptions.c | 33 +++++++++++++++++++ src/LYrcFile.c | 14 ++++++++ src/LYrcFile.h | 1 + 8 files changed, 132 insertions(+), 12 deletions(-) diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c index c5d947f..f1f8208 100644 --- a/WWW/Library/Implementation/HTParse.c +++ b/WWW/Library/Implementation/HTParse.c @@ -22,9 +22,14 @@ #endif /* __MINGW32__ */ #endif -#ifdef USE_IDNA +#ifdef USE_IDN2 +#include +#define FreeIdna(out) idn2_free(out) +#elif defined(USE_IDNA) #include #include +#define FreeIdna(out) idn_free(out) +#define IDN2_OK IDNA_SUCCESS #endif #define HEX_ESCAPE '%' @@ -242,7 +247,7 @@ char *HTParsePort(char *host, int *portp) return result; } -#ifdef USE_IDNA +#if defined(USE_IDNA) || defined(USE_IDN2) static int hex_decode(int ch) { int result = -1; @@ -299,8 +304,42 @@ static void convert_to_idna(char *host) } if (code) { *dst = '\0'; +#ifdef USE_IDN2 +#if (!defined(IDN2_VERSION_NUMBER) || IDN2_VERSION_NUMBER < 0x02000003) + /* + * Older libidn2 mishandles STD3, stripping underscores. + */ + if (strchr(buffer, '_') != NULL) { + code = -1; + } else +#endif + switch (LYidnaMode) { + case LYidna2003: + code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL); + break; + case LYidna2008: + /* IDNA2008 rules without the TR46 amendments */ + code = idn2_to_ascii_8z(buffer, &output, 0); + break; + case LYidnaTR46: + code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL + | IDN2_NFC_INPUT); + break; + case LYidnaCompat: + /* IDNA2008 */ + code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL + | IDN2_NFC_INPUT); + if (code == IDN2_DISALLOWED) { + /* IDNA2003 - compatible */ + code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL); + } + break; + } +#else code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES); - if (code == IDNA_SUCCESS) { +#endif + if (code == IDN2_OK) { + CTRACE((tfp, "convert_to_idna: `%s' -> `%s': OK\n", buffer, output)); strcpy(host, output); strcat(host, params); } else { @@ -309,7 +348,7 @@ static void convert_to_idna(char *host) idna_strerror((Idna_rc) code))); } if (output) - idn_free(output); + FreeIdna(output); } } free(buffer); @@ -539,7 +578,7 @@ char *HTParse(const char *aName, } } } -#ifdef USE_IDNA +#if defined(USE_IDNA) || defined(USE_IDN2) /* * Depending on locale-support, we could have a literal UTF-8 * string as a host name, or a URL-encoded form of that. diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h index ce1bff6..5496d82 100644 --- a/WWW/Library/Implementation/HTParse.h +++ b/WWW/Library/Implementation/HTParse.h @@ -49,6 +49,18 @@ extern "C" { #define URL_XALPHAS UCH(1) #define URL_XPALPHAS UCH(2) #define URL_PATH UCH(4) + +#ifdef USE_IDN2 + typedef enum { + LYidna2003 = 1, + LYidna2008, + LYidnaTR46, + LYidnaCompat + } HTIdnaModes; + + extern int LYidnaMode; +#endif + /* Strip white space off a string. HTStrip() * ------------------------------- * diff --git a/aclocal.m4 b/aclocal.m4 index 41139f9..4c68aec 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -3341,11 +3341,12 @@ test -d "$oldincludedir" && { $1="[$]$1 $cf_header_path_list" ])dnl dnl --------------------------------------------------------------------------- -dnl CF_HELP_MESSAGE version: 3 updated: 1998/01/14 10:56:23 +dnl CF_HELP_MESSAGE version: 4 updated: 2019/12/31 08:53:54 dnl --------------- dnl Insert text into the help-message, for readability, from AC_ARG_WITH. AC_DEFUN([CF_HELP_MESSAGE], -[AC_DIVERT_HELP([$1])dnl +[CF_ACVERSION_CHECK(2.53,[],[ +AC_DIVERT_HELP($1)])dnl ])dnl dnl --------------------------------------------------------------------------- dnl CF_INET_ADDR version: 7 updated: 2013/10/08 17:47:05 @@ -6600,25 +6601,41 @@ if test "$with_dmalloc" = yes ; then fi ])dnl dnl --------------------------------------------------------------------------- -dnl CF_WITH_IDNA version: 10 updated: 2015/04/15 19:08:48 +dnl CF_WITH_IDNA version: 11 updated: 2021/07/05 09:09:42 dnl ------------ -dnl Check for libidn, use it if found. +dnl Check for libidn2, use it if found. Otherwise, check for libidn, use that. dnl dnl $1 = optional path for headers/library AC_DEFUN([CF_WITH_IDNA],[ - CF_ADD_OPTIONAL_PATH($1) +CF_ADD_OPTIONAL_PATH($1) - CF_FIND_LINKAGE([ +CF_FIND_LINKAGE([ +#include +#include +],[ + char *output = 0; + int code = idn2_to_ascii_8z("name", &output, IDN2_USE_STD3_ASCII_RULES); + (void) code; +],idn2,,[CF_VERBOSE([unsuccessful, will try idn (older)])],,[$LIBICONV]) + +if test "x$cf_cv_find_linkage_idn2" = xyes ; then + CF_VERBOSE(found idn2 library) + AC_DEFINE(USE_IDN2,1,[Define to 1 if we should use IDN2 library]) +else + CF_FIND_LINKAGE([ #include #include ],[ char *output = 0; - int code = idna_to_ascii_8z("name", &output, IDNA_USE_STD3_ASCII_RULES); + int code = idna_to_ascii_8z("name", &output, IDNA_USE_STD3_ASCII_RULES); + (void) code; ],idn,,,,[$LIBICONV]) if test "x$cf_cv_find_linkage_idn" = xyes ; then + CF_VERBOSE(found idn library) AC_DEFINE(USE_IDNA,1,[Define to 1 if we should use IDNA library]) fi +fi ])dnl dnl --------------------------------------------------------------------------- dnl CF_WITH_PATH version: 11 updated: 2012/09/29 15:04:19 diff --git a/config.hin b/config.hin index fb0e4d3..58051bb 100644 --- a/config.hin +++ b/config.hin @@ -271,6 +271,7 @@ #undef USE_FILE_UPLOAD /* CF_ARG_DISABLE(file-upload) */ #undef USE_GNUTLS_FUNCS /* CF_GNUTLS */ #undef USE_GNUTLS_INCL /* CF_GNUTLS */ +#undef USE_IDN2 /* CF_ARG_DISABLE(idna) */ #undef USE_IDNA /* CF_ARG_DISABLE(idna) */ #undef USE_JUSTIFY_ELTS /* CF_ARG_DISABLE(justify-elts) */ #undef USE_LOCALE_CHARSET /* CF_ARG_DISABLE(locale-charset) */ diff --git a/src/LYMain.c b/src/LYMain.c index d36707e..ecfbe69 100644 --- a/src/LYMain.c +++ b/src/LYMain.c @@ -178,6 +178,9 @@ lynx_list_item_type *externals = NULL; /* linked list of external options */ #endif +#ifdef USE_IDN2 +int LYidnaMode = LYidnaTR46; +#endif lynx_list_item_type *uploaders = NULL; int LYShowColor = SHOW_COLOR_UNKNOWN; /* to show or not */ diff --git a/src/LYOptions.c b/src/LYOptions.c index 6b4b0e0..e0e4732 100644 --- a/src/LYOptions.c +++ b/src/LYOptions.c @@ -2356,6 +2356,18 @@ static const char *assume_char_set_string = RC_ASSUME_CHARSET; static const char *display_char_set_string = RC_CHARACTER_SET; static const char *raw_mode_string = RC_RAW_MODE; +#ifdef USE_IDN2 +static const char *idna_mode_string = RC_IDNA_MODE; +static OptValues idna_values[] = +{ + {LYidna2003, N_("IDNA 2003"), "idna2003"}, + {LYidna2008, N_("IDNA 2008"), "idna2008"}, + {LYidnaTR46, N_("IDNA TR46"), "idnaTR46"}, + {LYidnaCompat, N_("IDNA Compatible"), "idnaCompat"}, + END_OPTIONS +}; +#endif + #ifdef USE_LOCALE_CHARSET static const char *locale_charset_string = RC_LOCALE_CHARSET; #endif @@ -3233,6 +3245,13 @@ int postoptions(DocInfo *newdoc) current_char_set = newval; } } +#ifdef USE_IDN2 + /* Internationalized Domain Names: SELECT */ + if (!strcmp(data[i].tag, idna_mode_string) + && GetOptValues(idna_values, data[i].value, &code)) { + LYidnaMode = code; + } +#endif /* Raw Mode: ON/OFF */ if (!strcmp(data[i].tag, raw_mode_string) @@ -3933,6 +3952,20 @@ static int gen_options(char **newfile) EndSelect(fp0); } +#ifdef USE_IDN2 + /* Internationalized Domain Names: SELECT */ + { + PutLabel(fp0, gettext("Internationalized domain names"), idna_mode_string); + BeginSelect(fp0, idna_mode_string); + for (i = 0; idna_values[i].value != 0; i++) { + PutOption(fp0, idna_values[i].value == LYidnaMode, + idna_values[i].HtmlName, + idna_values[i].LongName); + } + EndSelect(fp0); + } +#endif + /* Raw Mode: ON/OFF */ if (LYHaveCJKCharacterSet) { /* diff --git a/src/LYrcFile.c b/src/LYrcFile.c index 1754f12..b98bfc2 100644 --- a/src/LYrcFile.c +++ b/src/LYrcFile.c @@ -71,6 +71,16 @@ static Config_Enum tbl_file_sort[] = { { NULL, -1 }, }; +#ifdef USE_IDN2 +static Config_Enum tbl_idna_mode[] = { + { "IDNA2003", LYidna2003 }, + { "IDNA2008", LYidna2008 }, + { "TR46", LYidnaTR46 }, + { "Compatible", LYidnaCompat }, + { NULL, -1 }, +}; +#endif + Config_Enum tbl_keypad_mode[] = { { "FIELDS_ARE_NUMBERED", FIELDS_ARE_NUMBERED }, { "LINKS_AND_FIELDS_ARE_NUMBERED", LINKS_AND_FIELDS_ARE_NUMBERED }, @@ -462,6 +472,10 @@ file lists such as FTP directories. The options are:\n\ MAYBE_SET(RC_HTML5_CHARSETS, html5_charsets, MSG_ENABLE_LYNXRC), MAYBE_FUN(RC_HTTP_PROTOCOL, get_http_protocol, put_http_protocol, MSG_ENABLE_LYNXRC), +#ifdef USE_IDN2 + MAYBE_ENU(RC_IDNA_MODE, LYidnaMode, tbl_idna_mode, + MSG_ENABLE_LYNXRC), +#endif #ifdef EXP_KEYBOARD_LAYOUT PARSE_ARY(RC_KBLAYOUT, current_layout, LYKbLayoutNames, NULL), #endif diff --git a/src/LYrcFile.h b/src/LYrcFile.h index 3cf07c0..cd41a0f 100644 --- a/src/LYrcFile.h +++ b/src/LYrcFile.h @@ -110,6 +110,7 @@ #define RC_HTTPS_PROXY "https_proxy" #define RC_HTTP_PROTOCOL "http_protocol" #define RC_HTTP_PROXY "http_proxy" +#define RC_IDNA_MODE "idna_mode" #define RC_INCLUDE "include" #define RC_INFLATE_PATH "inflate_path" #define RC_INFOSECS "infosecs" -- 2.34.1