/* test-glibc.c -- API trace test extracted from the glibc AI_IDN tests. Copyright (C) 2019 Red Hat, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* Before changing test expecations in this file, please contact the glibc developers on the libc-alpha mailing list to check if these changes are benign and will not lead to glibc test suite failures. Thanks. */ #include #include #include #include #include #include #include #include #include #include /* This assumes that wchar_t uses a UTF-16 or UTF-32 encoding. */ static const wchar_t L_naemchen[] = { L'n', 0344, L'm', L'c', L'h', L'e', L'n', 0 }; static const char *naemchen_latin1 = "n\344mchen"; static const char *naemchen_utf8 = "n\xC3\xA4mchen"; static const wchar_t L_shem[] = { 0x05E9, 0x05DD, 0 }; static const char *shem_utf8 = "\xD7\xA9\xD7\x9D"; /* Detected charset. Note that charset_latin1 covers both ISO-8859-1 and ISO-8859-15. */ enum charset_kind { charset_utf8, charset_latin1, charset_neither }; /* wcsrtombs with a static buffer. */ static char * __attribute__ ((malloc)) wcsrtombs_strdup (const wchar_t *input) { char buf[100]; const wchar_t *src = input; mbstate_t state; memset (&state, 0, sizeof (state)); size_t ret = wcsrtombs (buf, &src, sizeof (buf), &state); if (ret == (size_t) -1) buf[0] = '\0'; char *result = strdup (buf); if (result == NULL) { puts ("error: memory allocation failure"); exit (EXIT_FAILURE); } return result; } static const char *locale; static enum charset_kind determine_current_charset_kind (void) { const char *lc_string = locale_charset (); enum charset_kind expected; if (strcmp (lc_string, "UTF-8") == 0) expected = charset_utf8; else if (strcmp (lc_string, "ISO-8859-1") == 0 || strcmp (lc_string, "ISO-8859-15") == 0 || strcmp (lc_string, "CP1252") == 0) expected = charset_latin1; else expected = charset_neither; char *naemchen_bytes = wcsrtombs_strdup (L_naemchen); char *shem_bytes = wcsrtombs_strdup (L_shem); enum charset_kind actual; if (strcmp (naemchen_bytes, naemchen_utf8) == 0 && strcmp (shem_bytes, shem_utf8) == 0) actual = charset_utf8; else if (strcmp (naemchen_bytes, naemchen_latin1) == 0 && strcmp (shem_bytes, "") == 0) actual = charset_latin1; else actual = charset_neither; free (shem_bytes); free (naemchen_bytes); if (expected != actual) { printf ("error: locale %s: expected charset %u (%s), got %u\n", locale, expected, lc_string, actual); exit (EXIT_FAILURE); } return actual; } static int errors; static void check_success (const char *func, const char *input, const char *expected, int ret, char *actual) { if (ret != 0) { printf ("error: locale %s: %s: input \"%s\": %d\n", locale, func, input, ret); ++errors; } else { if (strcmp (actual, expected) != 0) { printf ("error: locale %s: %s: input \"%s\": \"%s\"\n", locale,func, input, actual); ++errors; } idn2_free (actual); } } static void check_lookup_ul_success (const char *input, const char *expected) { char *actual = NULL; int ret = idn2_lookup_ul (input, &actual, 0); check_success ("idn2_lookup_ul", input, expected, ret, actual); } static void check_to_unicode_lzlz_success (const char *input, const char *expected) { char *actual = NULL; int ret = idn2_to_unicode_lzlz (input, &actual, 0); check_success ("idn2_to_unicode_lzlz", input, expected, ret, actual); } static void check_to_unicode_lzlz_failure (const char *input, int expected) { char *unexpected = NULL; int actual = idn2_to_unicode_lzlz (input, &unexpected, 0); if (actual == 0) { printf ("error: idn2_to_unicode_lzlz: locale %s:" "unexpected success for input \"%s\": \"%s\"\n", locale, input, unexpected); ++errors; idn2_free (unexpected); } else if (actual != expected) { printf ("error: idn2_to_unicode_lzlz: locale %s:" "expected failure %d for input \"%s\", actual %d\n", locale, expected, input, actual); ++errors; } } static void run_utf8_tests (void) { check_lookup_ul_success ("\327\251\327\2351.example", "xn--1-qic9a.example"); check_lookup_ul_success ("\327\251\327\235.example", "xn--iebx.example"); check_lookup_ul_success ("both.cname.idn-cname.n\303\244mchen.example", "both.cname.idn-cname.xn--nmchen-bua.example"); check_lookup_ul_success ("bu\303\237e.example", "xn--bue-6ka.example"); check_lookup_ul_success ("n\303\244mchen.example", "xn--nmchen-bua.example"); check_lookup_ul_success ("n\303\244mchen_zwo.example", "xn--nmchen_zwo-q5a.example"); check_lookup_ul_success ("with.cname.n\303\244mchen.example", "with.cname.xn--nmchen-bua.example"); check_lookup_ul_success ("With.idn-cname.n\303\244mchen.example", "with.idn-cname.xn--nmchen-bua.example"); check_to_unicode_lzlz_success ("non-idn-cname.example", "non-idn-cname.example"); check_to_unicode_lzlz_success ("non-idn.example", "non-idn.example"); check_to_unicode_lzlz_success ("non-idn-name.example", "non-idn-name.example"); check_to_unicode_lzlz_success ("xn--1-qic9a.example", "\327\251\327\2351.example"); check_to_unicode_lzlz_success ("xn--anderes-nmchen-eib.example", "anderes-n\303\244mchen.example"); check_to_unicode_lzlz_success ("xn--bue-6ka.example", "bu\303\237e.example"); check_to_unicode_lzlz_success ("xn--iebx.example", "\327\251\327\235.example"); check_to_unicode_lzlz_success ("xn--nmchen-bua.example", "n\303\244mchen.example"); check_to_unicode_lzlz_success ("xn--nmchen_zwo-q5a.example", "n\303\244mchen_zwo.example"); check_to_unicode_lzlz_failure ("xn---.example", IDN2_PUNYCODE_BAD_INPUT); check_to_unicode_lzlz_failure ("xn--x.example", IDN2_PUNYCODE_BAD_INPUT); } static void run_latin1_tests (void) { check_lookup_ul_success ("both.cname.idn-cname.n\344mchen.example", "both.cname.idn-cname.xn--nmchen-bua.example"); check_lookup_ul_success ("bu\337e.example", "xn--bue-6ka.example"); check_lookup_ul_success ("n\344mchen.example", "xn--nmchen-bua.example"); check_lookup_ul_success ("n\344mchen_zwo.example", "xn--nmchen_zwo-q5a.example"); check_lookup_ul_success ("with.cname.n\344mchen.example", "with.cname.xn--nmchen-bua.example"); check_lookup_ul_success ("With.idn-cname.n\344mchen.example", "with.idn-cname.xn--nmchen-bua.example"); check_to_unicode_lzlz_success ("non-idn-cname.example", "non-idn-cname.example"); check_to_unicode_lzlz_success ("non-idn.example", "non-idn.example"); check_to_unicode_lzlz_success ("non-idn-name.example", "non-idn-name.example"); check_to_unicode_lzlz_success ("xn--anderes-nmchen-eib.example", "anderes-n\344mchen.example"); check_to_unicode_lzlz_success ("xn--bue-6ka.example", "bu\337e.example"); check_to_unicode_lzlz_success ("xn--nmchen-bua.example", "n\344mchen.example"); check_to_unicode_lzlz_success ("xn--nmchen_zwo-q5a.example", "n\344mchen_zwo.example"); check_to_unicode_lzlz_failure ("xn--1-qic9a.example", IDN2_ENCODING_ERROR); check_to_unicode_lzlz_failure ("xn--iebx.example", IDN2_ENCODING_ERROR); check_to_unicode_lzlz_failure ("xn---.example", IDN2_PUNYCODE_BAD_INPUT); check_to_unicode_lzlz_failure ("xn--x.example", IDN2_PUNYCODE_BAD_INPUT); } static const char *const locale_candidates[] = { "C", "C.UTF-8", "en_US", "en_US.utf8", "en_US.iso88591", "de_DE", "de_DE.utf8", "de_DE.iso88591", "de_DE.iso885915@euro", "fr_FR", "fr_FR.utf8", "fr_FR.iso88591", "he_IL.utf8", NULL }; int main (void) { bool utf8_seen = false; bool latin1_seen = false; for (size_t i = 0; locale_candidates[i] != NULL; ++i) { locale = locale_candidates[i]; if (setlocale (LC_ALL, locale) == NULL) continue; switch (determine_current_charset_kind ()) { case charset_utf8: run_utf8_tests (); utf8_seen = true; break; case charset_latin1: run_latin1_tests (); latin1_seen = true; break; case charset_neither: continue; } } if (!utf8_seen) { /* Mingw64 does not have a UTF-8 locale. */ #ifndef __MINGW64__ puts ("error: no UTF-8 locale found"); ++errors; #else puts ("warning: no UTF-8 support on Mingw"); #endif } /* Not everyone has a Latin-1 locale installed. */ if (!latin1_seen) puts ("warning: no Latin-1 locale found"); if (!(utf8_seen || latin1_seen)) { puts ("error: no usable locales found"); ++errors; } if (errors) return EXIT_FAILURE; else return EXIT_SUCCESS; }