/* test-glibc.c -- API trace test extracted from the glibc AI_IDN tests.
Copyright (C) 2019 Red Hat, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/* Before changing test expecations in this file, please contact the
glibc developers on the libc-alpha mailing list to check if these
changes are benign and will not lead to glibc test suite failures.
Thanks. */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* This assumes that wchar_t uses a UTF-16 or UTF-32 encoding. */
static const wchar_t L_naemchen[] =
{ L'n', 0344, L'm', L'c', L'h', L'e', L'n', 0 };
static const char *naemchen_latin1 = "n\344mchen";
static const char *naemchen_utf8 = "n\xC3\xA4mchen";
static const wchar_t L_shem[] = { 0x05E9, 0x05DD, 0 };
static const char *shem_utf8 = "\xD7\xA9\xD7\x9D";
/* Detected charset. Note that charset_latin1 covers both ISO-8859-1
and ISO-8859-15. */
enum charset_kind
{
charset_utf8, charset_latin1, charset_neither
};
/* wcsrtombs with a static buffer. */
static char * __attribute__ ((malloc))
wcsrtombs_strdup (const wchar_t *input)
{
char buf[100];
const wchar_t *src = input;
mbstate_t state;
memset (&state, 0, sizeof (state));
size_t ret = wcsrtombs (buf, &src, sizeof (buf), &state);
if (ret == (size_t) -1)
buf[0] = '\0';
char *result = strdup (buf);
if (result == NULL)
{
puts ("error: memory allocation failure");
exit (EXIT_FAILURE);
}
return result;
}
static const char *locale;
static enum charset_kind
determine_current_charset_kind (void)
{
const char *lc_string = locale_charset ();
enum charset_kind expected;
if (strcmp (lc_string, "UTF-8") == 0)
expected = charset_utf8;
else if (strcmp (lc_string, "ISO-8859-1") == 0
|| strcmp (lc_string, "ISO-8859-15") == 0
|| strcmp (lc_string, "CP1252") == 0)
expected = charset_latin1;
else
expected = charset_neither;
char *naemchen_bytes = wcsrtombs_strdup (L_naemchen);
char *shem_bytes = wcsrtombs_strdup (L_shem);
enum charset_kind actual;
if (strcmp (naemchen_bytes, naemchen_utf8) == 0
&& strcmp (shem_bytes, shem_utf8) == 0)
actual = charset_utf8;
else if (strcmp (naemchen_bytes, naemchen_latin1) == 0
&& strcmp (shem_bytes, "") == 0)
actual = charset_latin1;
else
actual = charset_neither;
free (shem_bytes);
free (naemchen_bytes);
if (expected != actual)
{
printf ("error: locale %s: expected charset %u (%s), got %u\n",
locale, expected, lc_string, actual);
exit (EXIT_FAILURE);
}
return actual;
}
static int errors;
static void
check_success (const char *func, const char *input, const char *expected,
int ret, char *actual)
{
if (ret != 0)
{
printf ("error: locale %s: %s: input \"%s\": %d\n",
locale, func, input, ret);
++errors;
}
else
{
if (strcmp (actual, expected) != 0)
{
printf ("error: locale %s: %s: input \"%s\": \"%s\"\n",
locale,func, input, actual);
++errors;
}
idn2_free (actual);
}
}
static void
check_lookup_ul_success (const char *input, const char *expected)
{
char *actual = NULL;
int ret = idn2_lookup_ul (input, &actual, 0);
check_success ("idn2_lookup_ul", input, expected, ret, actual);
}
static void
check_to_unicode_lzlz_success (const char *input, const char *expected)
{
char *actual = NULL;
int ret = idn2_to_unicode_lzlz (input, &actual, 0);
check_success ("idn2_to_unicode_lzlz", input, expected, ret, actual);
}
static void
check_to_unicode_lzlz_failure (const char *input, int expected)
{
char *unexpected = NULL;
int actual = idn2_to_unicode_lzlz (input, &unexpected, 0);
if (actual == 0)
{
printf ("error: idn2_to_unicode_lzlz: locale %s:"
"unexpected success for input \"%s\": \"%s\"\n",
locale, input, unexpected);
++errors;
idn2_free (unexpected);
}
else if (actual != expected)
{
printf ("error: idn2_to_unicode_lzlz: locale %s:"
"expected failure %d for input \"%s\", actual %d\n",
locale, expected, input, actual);
++errors;
}
}
static void
run_utf8_tests (void)
{
check_lookup_ul_success ("\327\251\327\2351.example", "xn--1-qic9a.example");
check_lookup_ul_success ("\327\251\327\235.example", "xn--iebx.example");
check_lookup_ul_success ("both.cname.idn-cname.n\303\244mchen.example",
"both.cname.idn-cname.xn--nmchen-bua.example");
check_lookup_ul_success ("bu\303\237e.example", "xn--bue-6ka.example");
check_lookup_ul_success ("n\303\244mchen.example", "xn--nmchen-bua.example");
check_lookup_ul_success ("n\303\244mchen_zwo.example",
"xn--nmchen_zwo-q5a.example");
check_lookup_ul_success ("with.cname.n\303\244mchen.example",
"with.cname.xn--nmchen-bua.example");
check_lookup_ul_success ("With.idn-cname.n\303\244mchen.example",
"with.idn-cname.xn--nmchen-bua.example");
check_to_unicode_lzlz_success ("non-idn-cname.example",
"non-idn-cname.example");
check_to_unicode_lzlz_success ("non-idn.example", "non-idn.example");
check_to_unicode_lzlz_success ("non-idn-name.example",
"non-idn-name.example");
check_to_unicode_lzlz_success ("xn--1-qic9a.example",
"\327\251\327\2351.example");
check_to_unicode_lzlz_success ("xn--anderes-nmchen-eib.example",
"anderes-n\303\244mchen.example");
check_to_unicode_lzlz_success ("xn--bue-6ka.example", "bu\303\237e.example");
check_to_unicode_lzlz_success ("xn--iebx.example",
"\327\251\327\235.example");
check_to_unicode_lzlz_success ("xn--nmchen-bua.example",
"n\303\244mchen.example");
check_to_unicode_lzlz_success ("xn--nmchen_zwo-q5a.example",
"n\303\244mchen_zwo.example");
check_to_unicode_lzlz_failure ("xn---.example", IDN2_PUNYCODE_BAD_INPUT);
check_to_unicode_lzlz_failure ("xn--x.example", IDN2_PUNYCODE_BAD_INPUT);
}
static void
run_latin1_tests (void)
{
check_lookup_ul_success ("both.cname.idn-cname.n\344mchen.example",
"both.cname.idn-cname.xn--nmchen-bua.example");
check_lookup_ul_success ("bu\337e.example", "xn--bue-6ka.example");
check_lookup_ul_success ("n\344mchen.example", "xn--nmchen-bua.example");
check_lookup_ul_success ("n\344mchen_zwo.example",
"xn--nmchen_zwo-q5a.example");
check_lookup_ul_success ("with.cname.n\344mchen.example",
"with.cname.xn--nmchen-bua.example");
check_lookup_ul_success ("With.idn-cname.n\344mchen.example",
"with.idn-cname.xn--nmchen-bua.example");
check_to_unicode_lzlz_success ("non-idn-cname.example",
"non-idn-cname.example");
check_to_unicode_lzlz_success ("non-idn.example", "non-idn.example");
check_to_unicode_lzlz_success ("non-idn-name.example",
"non-idn-name.example");
check_to_unicode_lzlz_success ("xn--anderes-nmchen-eib.example",
"anderes-n\344mchen.example");
check_to_unicode_lzlz_success ("xn--bue-6ka.example", "bu\337e.example");
check_to_unicode_lzlz_success ("xn--nmchen-bua.example",
"n\344mchen.example");
check_to_unicode_lzlz_success ("xn--nmchen_zwo-q5a.example",
"n\344mchen_zwo.example");
check_to_unicode_lzlz_failure ("xn--1-qic9a.example", IDN2_ENCODING_ERROR);
check_to_unicode_lzlz_failure ("xn--iebx.example", IDN2_ENCODING_ERROR);
check_to_unicode_lzlz_failure ("xn---.example", IDN2_PUNYCODE_BAD_INPUT);
check_to_unicode_lzlz_failure ("xn--x.example", IDN2_PUNYCODE_BAD_INPUT);
}
static const char *const locale_candidates[] =
{
"C",
"C.UTF-8",
"en_US",
"en_US.utf8",
"en_US.iso88591",
"de_DE",
"de_DE.utf8",
"de_DE.iso88591",
"de_DE.iso885915@euro",
"fr_FR",
"fr_FR.utf8",
"fr_FR.iso88591",
"he_IL.utf8",
NULL
};
int
main (void)
{
bool utf8_seen = false;
bool latin1_seen = false;
for (size_t i = 0; locale_candidates[i] != NULL; ++i)
{
locale = locale_candidates[i];
if (setlocale (LC_ALL, locale) == NULL)
continue;
switch (determine_current_charset_kind ())
{
case charset_utf8:
run_utf8_tests ();
utf8_seen = true;
break;
case charset_latin1:
run_latin1_tests ();
latin1_seen = true;
break;
case charset_neither:
continue;
}
}
if (!utf8_seen)
{
/* Mingw64 does not have a UTF-8 locale. */
#ifndef __MINGW64__
puts ("error: no UTF-8 locale found");
++errors;
#else
puts ("warning: no UTF-8 support on Mingw");
#endif
}
/* Not everyone has a Latin-1 locale installed. */
if (!latin1_seen)
puts ("warning: no Latin-1 locale found");
if (!(utf8_seen || latin1_seen))
{
puts ("error: no usable locales found");
++errors;
}
if (errors)
return EXIT_FAILURE;
else
return EXIT_SUCCESS;
}