/* idna.c - implementation of high-level IDNA processing function Copyright (C) 2011-2017 Simon Josefsson Libidn2 is free software: you can redistribute it and/or modify it under the terms of either: * the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. or * the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. or both in parallel, as here. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received copies of the GNU General Public License and the GNU Lesser General Public License along with this program. If not, see . */ #include #include /* free */ #include /* errno */ #include "idn2.h" #include "bidi.h" #include "tables.h" #include "context.h" #include "tr46map.h" #include #include /* uc_is_general_category, UC_CATEGORY_M */ #include /* u32_normalize */ #include /* u8_to_u32 */ #include "idna.h" /* * NFC Quick Check from * http://unicode.org/reports/tr15/#Detecting_Normalization_Forms * * They say, this is much faster than 'brute force' normalization. * Strings are very likely already in NFC form. */ G_GNUC_IDN2_ATTRIBUTE_PURE static int _isNFC (uint32_t * label, size_t len) { int lastCanonicalClass = 0; int result = 1; size_t it; for (it = 0; it < len; it++) { uint32_t ch = label[it]; // supplementary code point if (ch >= 0x10000) it++; int canonicalClass = uc_combining_class (ch); if (lastCanonicalClass > canonicalClass && canonicalClass != 0) return 0; NFCQCMap *map = get_nfcqc_map (ch); if (map) { if (map->check) return 0; result = -1; } lastCanonicalClass = canonicalClass; } return result; } int _idn2_u8_to_u32_nfc (const uint8_t * src, size_t srclen, uint32_t ** out, size_t * outlen, int nfc) { uint32_t *p; size_t plen; p = u8_to_u32 (src, srclen, NULL, &plen); if (p == NULL) { if (errno == ENOMEM) return IDN2_MALLOC; return IDN2_ENCODING_ERROR; } if (nfc && !_isNFC (p, plen)) { size_t tmplen; uint32_t *tmp = u32_normalize (UNINORM_NFC, p, plen, NULL, &tmplen); free (p); if (tmp == NULL) { if (errno == ENOMEM) return IDN2_MALLOC; return IDN2_NFC; } p = tmp; plen = tmplen; } *out = p; *outlen = plen; return IDN2_OK; } bool _idn2_ascii_p (const uint8_t * src, size_t srclen) { size_t i; for (i = 0; i < srclen; i++) if (src[i] >= 0x80) return false; return true; } int _idn2_label_test (int what, const uint32_t * label, size_t llen) { if (what & TEST_NFC) { size_t plen; uint32_t *p = u32_normalize (UNINORM_NFC, label, llen, NULL, &plen); int ok; if (p == NULL) { if (errno == ENOMEM) return IDN2_MALLOC; return IDN2_NFC; } ok = llen == plen && memcmp (label, p, plen * sizeof (*label)) == 0; free (p); if (!ok) return IDN2_NOT_NFC; } if (what & TEST_2HYPHEN) { if (llen >= 4 && label[2] == '-' && label[3] == '-') return IDN2_2HYPHEN; } if (what & TEST_HYPHEN_STARTEND) { if (llen > 0 && (label[0] == '-' || label[llen - 1] == '-')) return IDN2_HYPHEN_STARTEND; } if (what & TEST_LEADING_COMBINING) { if (llen > 0 && uc_is_general_category (label[0], UC_CATEGORY_M)) return IDN2_LEADING_COMBINING; } if (what & TEST_DISALLOWED) { size_t i; for (i = 0; i < llen; i++) if (_idn2_disallowed_p (label[i])) { if ((what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) && (what & TEST_ALLOW_STD3_DISALLOWED)) { IDNAMap map; get_idna_map (label[i], &map); if (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) || map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED)) continue; } return IDN2_DISALLOWED; } } if (what & TEST_CONTEXTJ) { size_t i; for (i = 0; i < llen; i++) if (_idn2_contextj_p (label[i])) return IDN2_CONTEXTJ; } if (what & TEST_CONTEXTJ_RULE) { size_t i; int rc; for (i = 0; i < llen; i++) { rc = _idn2_contextj_rule (label, llen, i); if (rc != IDN2_OK) return rc; } } if (what & TEST_CONTEXTO) { size_t i; for (i = 0; i < llen; i++) if (_idn2_contexto_p (label[i])) return IDN2_CONTEXTO; } if (what & TEST_CONTEXTO_WITH_RULE) { size_t i; for (i = 0; i < llen; i++) if (_idn2_contexto_p (label[i]) && !_idn2_contexto_with_rule (label[i])) return IDN2_CONTEXTO_NO_RULE; } if (what & TEST_CONTEXTO_RULE) { size_t i; int rc; for (i = 0; i < llen; i++) { rc = _idn2_contexto_rule (label, llen, i); if (rc != IDN2_OK) return rc; } } if (what & TEST_UNASSIGNED) { size_t i; for (i = 0; i < llen; i++) if (_idn2_unassigned_p (label[i])) return IDN2_UNASSIGNED; } if (what & TEST_BIDI) { int rc = _idn2_bidi (label, llen); if (rc != IDN2_OK) return rc; } if (what & (TEST_TRANSITIONAL | TEST_NONTRANSITIONAL)) { size_t i; int transitional = what & TEST_TRANSITIONAL; /* TR46: 4. The label must not contain a U+002E ( . ) FULL STOP */ for (i = 0; i < llen; i++) if (label[i] == 0x002E) return IDN2_DOT_IN_LABEL; /* TR46: 6. Each code point in the label must only have certain status * values according to Section 5, IDNA Mapping Table: * a. For Transitional Processing, each value must be valid. * b. For Nontransitional Processing, each value must be either valid or deviation. */ for (i = 0; i < llen; i++) { IDNAMap map; get_idna_map (label[i], &map); if (map_is (&map, TR46_FLG_VALID) || (!transitional && map_is (&map, TR46_FLG_DEVIATION))) continue; if (what & TEST_ALLOW_STD3_DISALLOWED && (map_is (&map, TR46_FLG_DISALLOWED_STD3_VALID) || map_is (&map, TR46_FLG_DISALLOWED_STD3_MAPPED))) continue; return transitional ? IDN2_INVALID_TRANSITIONAL : IDN2_INVALID_NONTRANSITIONAL; } } return IDN2_OK; }