Mercurial
comparison third_party/libuv/test/test-idna.c @ 160:948de3f54cea
[ThirdParty] Added libuv
| author | June Park <parkjune1995@gmail.com> |
|---|---|
| date | Wed, 14 Jan 2026 19:39:52 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 159:05cf9467a1c3 | 160:948de3f54cea |
|---|---|
| 1 /* Copyright The libuv project and contributors. All rights reserved. | |
| 2 * | |
| 3 * Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 4 * of this software and associated documentation files (the "Software"), to | |
| 5 * deal in the Software without restriction, including without limitation the | |
| 6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
| 7 * sell copies of the Software, and to permit persons to whom the Software is | |
| 8 * furnished to do so, subject to the following conditions: | |
| 9 * | |
| 10 * The above copyright notice and this permission notice shall be included in | |
| 11 * all copies or substantial portions of the Software. | |
| 12 * | |
| 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
| 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 19 * IN THE SOFTWARE. | |
| 20 */ | |
| 21 | |
| 22 #include "task.h" | |
| 23 #define uv__malloc malloc | |
| 24 #include "../src/idna.c" | |
| 25 #include <string.h> | |
| 26 | |
| 27 TEST_IMPL(utf8_decode1) { | |
| 28 const char* p; | |
| 29 char b[32]; | |
| 30 int i; | |
| 31 | |
| 32 /* ASCII. */ | |
| 33 p = b; | |
| 34 snprintf(b, sizeof(b), "%c\x7F", 0x00); | |
| 35 ASSERT_OK(uv__utf8_decode1(&p, b + sizeof(b))); | |
| 36 ASSERT_PTR_EQ(p, b + 1); | |
| 37 ASSERT_EQ(127, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 38 ASSERT_PTR_EQ(p, b + 2); | |
| 39 | |
| 40 /* Two-byte sequences. */ | |
| 41 p = b; | |
| 42 snprintf(b, sizeof(b), "%s", "\xC2\x80\xDF\xBF"); | |
| 43 ASSERT_EQ(128, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 44 ASSERT_PTR_EQ(p, b + 2); | |
| 45 ASSERT_EQ(0x7FF, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 46 ASSERT_PTR_EQ(p, b + 4); | |
| 47 | |
| 48 /* Three-byte sequences. */ | |
| 49 p = b; | |
| 50 snprintf(b, sizeof(b), "%s", "\xE0\xA0\x80\xEF\xBF\xBF"); | |
| 51 ASSERT_EQ(0x800, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 52 ASSERT_PTR_EQ(p, b + 3); | |
| 53 ASSERT_EQ(0xFFFF, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 54 ASSERT_PTR_EQ(p, b + 6); | |
| 55 | |
| 56 /* Four-byte sequences. */ | |
| 57 p = b; | |
| 58 snprintf(b, sizeof(b), "%s", "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF"); | |
| 59 ASSERT_EQ(0x10000, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 60 ASSERT_PTR_EQ(p, b + 4); | |
| 61 ASSERT_EQ(0x10FFFF, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 62 ASSERT_PTR_EQ(p, b + 8); | |
| 63 | |
| 64 /* Four-byte sequences > U+10FFFF; disallowed. */ | |
| 65 p = b; | |
| 66 snprintf(b, sizeof(b), "%s", "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF"); | |
| 67 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 68 ASSERT_PTR_EQ(p, b + 4); | |
| 69 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 70 ASSERT_PTR_EQ(p, b + 8); | |
| 71 | |
| 72 /* Overlong; disallowed. */ | |
| 73 p = b; | |
| 74 snprintf(b, sizeof(b), "%s", "\xC0\x80\xC1\x80"); | |
| 75 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 76 ASSERT_PTR_EQ(p, b + 2); | |
| 77 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 78 ASSERT_PTR_EQ(p, b + 4); | |
| 79 | |
| 80 /* Surrogate pairs; disallowed. */ | |
| 81 p = b; | |
| 82 snprintf(b, sizeof(b), "%s", "\xED\xA0\x80\xED\xA3\xBF"); | |
| 83 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 84 ASSERT_PTR_EQ(p, b + 3); | |
| 85 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 86 ASSERT_PTR_EQ(p, b + 6); | |
| 87 | |
| 88 /* Simply illegal. */ | |
| 89 p = b; | |
| 90 snprintf(b, sizeof(b), "%s", "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"); | |
| 91 | |
| 92 for (i = 1; i <= 8; i++) { | |
| 93 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b))); | |
| 94 ASSERT_PTR_EQ(p, b + i); | |
| 95 } | |
| 96 | |
| 97 return 0; | |
| 98 } | |
| 99 | |
| 100 TEST_IMPL(utf8_decode1_overrun) { | |
| 101 const char* p; | |
| 102 char b[1]; | |
| 103 char c[1]; | |
| 104 | |
| 105 /* Single byte. */ | |
| 106 p = b; | |
| 107 b[0] = 0x7F; | |
| 108 ASSERT_EQ(0x7F, uv__utf8_decode1(&p, b + 1)); | |
| 109 ASSERT_PTR_EQ(p, b + 1); | |
| 110 | |
| 111 /* Multi-byte. */ | |
| 112 p = b; | |
| 113 b[0] = 0xC0; | |
| 114 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + 1)); | |
| 115 ASSERT_PTR_EQ(p, b + 1); | |
| 116 | |
| 117 b[0] = 0x7F; | |
| 118 ASSERT_EQ(UV_EINVAL, uv__idna_toascii(b, b + 0, c, c + 1)); | |
| 119 ASSERT_EQ(UV_EINVAL, uv__idna_toascii(b, b + 1, c, c + 1)); | |
| 120 | |
| 121 return 0; | |
| 122 } | |
| 123 | |
| 124 /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */ | |
| 125 #ifndef __MVS__ | |
| 126 | |
| 127 #define F(input, err) \ | |
| 128 do { \ | |
| 129 char d[256] = {0}; \ | |
| 130 static const char s[] = "" input ""; \ | |
| 131 ASSERT_EQ(err, uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \ | |
| 132 } while (0) | |
| 133 | |
| 134 #define T(input, expected) \ | |
| 135 do { \ | |
| 136 long n; \ | |
| 137 char d1[256] = {0}; \ | |
| 138 char d2[256] = {0}; \ | |
| 139 static const char s[] = "" input ""; \ | |
| 140 n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \ | |
| 141 ASSERT_EQ(n, sizeof(expected)); \ | |
| 142 ASSERT_OK(memcmp(d1, expected, n)); \ | |
| 143 /* Sanity check: encoding twice should not change the output. */ \ | |
| 144 n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \ | |
| 145 ASSERT_EQ(n, sizeof(expected)); \ | |
| 146 ASSERT_OK(memcmp(d2, expected, n)); \ | |
| 147 ASSERT_OK(memcmp(d1, d2, sizeof(d2))); \ | |
| 148 } while (0) | |
| 149 | |
| 150 TEST_IMPL(idna_toascii) { | |
| 151 /* Illegal inputs. */ | |
| 152 F("\xC0\x80\xC1\x80", UV_EINVAL); /* Overlong UTF-8 sequence. */ | |
| 153 F("\xC0\x80\xC1\x80.com", UV_EINVAL); /* Overlong UTF-8 sequence. */ | |
| 154 F("", UV_EINVAL); | |
| 155 /* No conversion. */ | |
| 156 T(".", "."); | |
| 157 T(".com", ".com"); | |
| 158 T("example", "example"); | |
| 159 T("example-", "example-"); | |
| 160 T("straße.de", "xn--strae-oqa.de"); | |
| 161 /* Test cases adapted from punycode.js. Most are from RFC 3492. */ | |
| 162 T("foo.bar", "foo.bar"); | |
| 163 T("mañana.com", "xn--maana-pta.com"); | |
| 164 T("example.com.", "example.com."); | |
| 165 T("bücher.com", "xn--bcher-kva.com"); | |
| 166 T("café.com", "xn--caf-dma.com"); | |
| 167 T("café.café.com", "xn--caf-dma.xn--caf-dma.com"); | |
| 168 T("☃-⌘.com", "xn----dqo34k.com"); | |
| 169 T("퐀☃-⌘.com", "xn----dqo34kn65z.com"); | |
| 170 T("💩.la", "xn--ls8h.la"); | |
| 171 T("mañana.com", "xn--maana-pta.com"); | |
| 172 T("mañana。com", "xn--maana-pta.com"); | |
| 173 T("mañana.com", "xn--maana-pta.com"); | |
| 174 T("mañana。com", "xn--maana-pta.com"); | |
| 175 T("ü", "xn--tda"); | |
| 176 T(".ü", ".xn--tda"); | |
| 177 T("ü.ü", "xn--tda.xn--tda"); | |
| 178 T("ü.ü.", "xn--tda.xn--tda."); | |
| 179 T("üëäö♥", "xn--4can8av2009b"); | |
| 180 T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres", | |
| 181 "xn--Willst du die Blthe des frhen, " | |
| 182 "die Frchte des spteren Jahres-x9e96lkal"); | |
| 183 T("ليهمابتكلموشعربي؟", "xn--egbpdaj6bu4bxfgehfvwxn"); | |
| 184 T("他们为什么不说中文", "xn--ihqwcrb4cv8a8dqg056pqjye"); | |
| 185 T("他們爲什麽不說中文", "xn--ihqwctvzc91f659drss3x8bo0yb"); | |
| 186 T("Pročprostěnemluvíčesky", "xn--Proprostnemluvesky-uyb24dma41a"); | |
| 187 T("למההםפשוטלאמדבריםעברית", "xn--4dbcagdahymbxekheh6e0a7fei0b"); | |
| 188 T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं", | |
| 189 "xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"); | |
| 190 T("なぜみんな日本語を話してくれないのか", | |
| 191 "xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"); | |
| 192 T("세계의모든사람들이한국어를이해한다면얼마나좋을까", | |
| 193 "xn--989aomsvi5e83db1d2a355cv1e0vak1d" | |
| 194 "wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"); | |
| 195 T("почемужеонинеговорятпорусски", "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l"); | |
| 196 T("PorquénopuedensimplementehablarenEspañol", | |
| 197 "xn--PorqunopuedensimplementehablarenEspaol-fmd56a"); | |
| 198 T("TạisaohọkhôngthểchỉnóitiếngViệt", | |
| 199 "xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"); | |
| 200 T("3年B組金八先生", "xn--3B-ww4c5e180e575a65lsy2b"); | |
| 201 T("安室奈美恵-with-SUPER-MONKEYS", | |
| 202 "xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"); | |
| 203 T("Hello-Another-Way-それぞれの場所", | |
| 204 "xn--Hello-Another-Way--fc4qua05auwb3674vfr0b"); | |
| 205 T("ひとつ屋根の下2", "xn--2-u9tlzr9756bt3uc0v"); | |
| 206 T("MajiでKoiする5秒前", "xn--MajiKoi5-783gue6qz075azm5e"); | |
| 207 T("パフィーdeルンバ", "xn--de-jg4avhby1noc0d"); | |
| 208 T("そのスピードで", "xn--d9juau41awczczp"); | |
| 209 T("-> $1.00 <-", "-> $1.00 <-"); | |
| 210 /* Test cases from https://unicode.org/reports/tr46/ */ | |
| 211 T("faß.de", "xn--fa-hia.de"); | |
| 212 T("βόλος.com", "xn--nxasmm1c.com"); | |
| 213 T("ශ්රී.com", "xn--10cl1a0b660p.com"); | |
| 214 T("نامهای.com", "xn--mgba3gch31f060k.com"); | |
| 215 return 0; | |
| 216 } | |
| 217 | |
| 218 #undef T | |
| 219 | |
| 220 #endif /* __MVS__ */ | |
| 221 | |
| 222 TEST_IMPL(wtf8) { | |
| 223 static const char input[] = "ᜄȺy𐞲:𞢢𘴇𐀀'¥3̞[<i$"; | |
| 224 uint16_t buf[32]; | |
| 225 ssize_t len; | |
| 226 | |
| 227 len = uv_wtf8_length_as_utf16(input); | |
| 228 ASSERT_GT(len, 0); | |
| 229 ASSERT_LT(len, ARRAY_SIZE(buf)); | |
| 230 uv_wtf8_to_utf16(input, buf, len); | |
| 231 return 0; | |
| 232 } |