|
160
|
1 /* Copyright The libuv project and contributors. All rights reserved.
|
|
|
2 *
|
|
|
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
4 * of this software and associated documentation files (the "Software"), to
|
|
|
5 * deal in the Software without restriction, including without limitation the
|
|
|
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
|
7 * sell copies of the Software, and to permit persons to whom the Software is
|
|
|
8 * furnished to do so, subject to the following conditions:
|
|
|
9 *
|
|
|
10 * The above copyright notice and this permission notice shall be included in
|
|
|
11 * all copies or substantial portions of the Software.
|
|
|
12 *
|
|
|
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
19 * IN THE SOFTWARE.
|
|
|
20 */
|
|
|
21
|
|
|
22 #include "task.h"
|
|
|
23 #define uv__malloc malloc
|
|
|
24 #include "../src/idna.c"
|
|
|
25 #include <string.h>
|
|
|
26
|
|
|
27 TEST_IMPL(utf8_decode1) {
|
|
|
28 const char* p;
|
|
|
29 char b[32];
|
|
|
30 int i;
|
|
|
31
|
|
|
32 /* ASCII. */
|
|
|
33 p = b;
|
|
|
34 snprintf(b, sizeof(b), "%c\x7F", 0x00);
|
|
|
35 ASSERT_OK(uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
36 ASSERT_PTR_EQ(p, b + 1);
|
|
|
37 ASSERT_EQ(127, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
38 ASSERT_PTR_EQ(p, b + 2);
|
|
|
39
|
|
|
40 /* Two-byte sequences. */
|
|
|
41 p = b;
|
|
|
42 snprintf(b, sizeof(b), "%s", "\xC2\x80\xDF\xBF");
|
|
|
43 ASSERT_EQ(128, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
44 ASSERT_PTR_EQ(p, b + 2);
|
|
|
45 ASSERT_EQ(0x7FF, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
46 ASSERT_PTR_EQ(p, b + 4);
|
|
|
47
|
|
|
48 /* Three-byte sequences. */
|
|
|
49 p = b;
|
|
|
50 snprintf(b, sizeof(b), "%s", "\xE0\xA0\x80\xEF\xBF\xBF");
|
|
|
51 ASSERT_EQ(0x800, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
52 ASSERT_PTR_EQ(p, b + 3);
|
|
|
53 ASSERT_EQ(0xFFFF, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
54 ASSERT_PTR_EQ(p, b + 6);
|
|
|
55
|
|
|
56 /* Four-byte sequences. */
|
|
|
57 p = b;
|
|
|
58 snprintf(b, sizeof(b), "%s", "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF");
|
|
|
59 ASSERT_EQ(0x10000, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
60 ASSERT_PTR_EQ(p, b + 4);
|
|
|
61 ASSERT_EQ(0x10FFFF, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
62 ASSERT_PTR_EQ(p, b + 8);
|
|
|
63
|
|
|
64 /* Four-byte sequences > U+10FFFF; disallowed. */
|
|
|
65 p = b;
|
|
|
66 snprintf(b, sizeof(b), "%s", "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF");
|
|
|
67 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
68 ASSERT_PTR_EQ(p, b + 4);
|
|
|
69 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
70 ASSERT_PTR_EQ(p, b + 8);
|
|
|
71
|
|
|
72 /* Overlong; disallowed. */
|
|
|
73 p = b;
|
|
|
74 snprintf(b, sizeof(b), "%s", "\xC0\x80\xC1\x80");
|
|
|
75 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
76 ASSERT_PTR_EQ(p, b + 2);
|
|
|
77 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
78 ASSERT_PTR_EQ(p, b + 4);
|
|
|
79
|
|
|
80 /* Surrogate pairs; disallowed. */
|
|
|
81 p = b;
|
|
|
82 snprintf(b, sizeof(b), "%s", "\xED\xA0\x80\xED\xA3\xBF");
|
|
|
83 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
84 ASSERT_PTR_EQ(p, b + 3);
|
|
|
85 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
86 ASSERT_PTR_EQ(p, b + 6);
|
|
|
87
|
|
|
88 /* Simply illegal. */
|
|
|
89 p = b;
|
|
|
90 snprintf(b, sizeof(b), "%s", "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
|
|
|
91
|
|
|
92 for (i = 1; i <= 8; i++) {
|
|
|
93 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + sizeof(b)));
|
|
|
94 ASSERT_PTR_EQ(p, b + i);
|
|
|
95 }
|
|
|
96
|
|
|
97 return 0;
|
|
|
98 }
|
|
|
99
|
|
|
100 TEST_IMPL(utf8_decode1_overrun) {
|
|
|
101 const char* p;
|
|
|
102 char b[1];
|
|
|
103 char c[1];
|
|
|
104
|
|
|
105 /* Single byte. */
|
|
|
106 p = b;
|
|
|
107 b[0] = 0x7F;
|
|
|
108 ASSERT_EQ(0x7F, uv__utf8_decode1(&p, b + 1));
|
|
|
109 ASSERT_PTR_EQ(p, b + 1);
|
|
|
110
|
|
|
111 /* Multi-byte. */
|
|
|
112 p = b;
|
|
|
113 b[0] = 0xC0;
|
|
|
114 ASSERT_EQ((unsigned) -1, uv__utf8_decode1(&p, b + 1));
|
|
|
115 ASSERT_PTR_EQ(p, b + 1);
|
|
|
116
|
|
|
117 b[0] = 0x7F;
|
|
|
118 ASSERT_EQ(UV_EINVAL, uv__idna_toascii(b, b + 0, c, c + 1));
|
|
|
119 ASSERT_EQ(UV_EINVAL, uv__idna_toascii(b, b + 1, c, c + 1));
|
|
|
120
|
|
|
121 return 0;
|
|
|
122 }
|
|
|
123
|
|
|
124 /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
|
|
|
125 #ifndef __MVS__
|
|
|
126
|
|
|
127 #define F(input, err) \
|
|
|
128 do { \
|
|
|
129 char d[256] = {0}; \
|
|
|
130 static const char s[] = "" input ""; \
|
|
|
131 ASSERT_EQ(err, uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \
|
|
|
132 } while (0)
|
|
|
133
|
|
|
134 #define T(input, expected) \
|
|
|
135 do { \
|
|
|
136 long n; \
|
|
|
137 char d1[256] = {0}; \
|
|
|
138 char d2[256] = {0}; \
|
|
|
139 static const char s[] = "" input ""; \
|
|
|
140 n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \
|
|
|
141 ASSERT_EQ(n, sizeof(expected)); \
|
|
|
142 ASSERT_OK(memcmp(d1, expected, n)); \
|
|
|
143 /* Sanity check: encoding twice should not change the output. */ \
|
|
|
144 n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \
|
|
|
145 ASSERT_EQ(n, sizeof(expected)); \
|
|
|
146 ASSERT_OK(memcmp(d2, expected, n)); \
|
|
|
147 ASSERT_OK(memcmp(d1, d2, sizeof(d2))); \
|
|
|
148 } while (0)
|
|
|
149
|
|
|
150 TEST_IMPL(idna_toascii) {
|
|
|
151 /* Illegal inputs. */
|
|
|
152 F("\xC0\x80\xC1\x80", UV_EINVAL); /* Overlong UTF-8 sequence. */
|
|
|
153 F("\xC0\x80\xC1\x80.com", UV_EINVAL); /* Overlong UTF-8 sequence. */
|
|
|
154 F("", UV_EINVAL);
|
|
|
155 /* No conversion. */
|
|
|
156 T(".", ".");
|
|
|
157 T(".com", ".com");
|
|
|
158 T("example", "example");
|
|
|
159 T("example-", "example-");
|
|
|
160 T("straße.de", "xn--strae-oqa.de");
|
|
|
161 /* Test cases adapted from punycode.js. Most are from RFC 3492. */
|
|
|
162 T("foo.bar", "foo.bar");
|
|
|
163 T("mañana.com", "xn--maana-pta.com");
|
|
|
164 T("example.com.", "example.com.");
|
|
|
165 T("bücher.com", "xn--bcher-kva.com");
|
|
|
166 T("café.com", "xn--caf-dma.com");
|
|
|
167 T("café.café.com", "xn--caf-dma.xn--caf-dma.com");
|
|
|
168 T("☃-⌘.com", "xn----dqo34k.com");
|
|
|
169 T("퐀☃-⌘.com", "xn----dqo34kn65z.com");
|
|
|
170 T("💩.la", "xn--ls8h.la");
|
|
|
171 T("mañana.com", "xn--maana-pta.com");
|
|
|
172 T("mañana。com", "xn--maana-pta.com");
|
|
|
173 T("mañana.com", "xn--maana-pta.com");
|
|
|
174 T("mañana。com", "xn--maana-pta.com");
|
|
|
175 T("ü", "xn--tda");
|
|
|
176 T(".ü", ".xn--tda");
|
|
|
177 T("ü.ü", "xn--tda.xn--tda");
|
|
|
178 T("ü.ü.", "xn--tda.xn--tda.");
|
|
|
179 T("üëäö♥", "xn--4can8av2009b");
|
|
|
180 T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
|
|
|
181 "xn--Willst du die Blthe des frhen, "
|
|
|
182 "die Frchte des spteren Jahres-x9e96lkal");
|
|
|
183 T("ليهمابتكلموشعربي؟", "xn--egbpdaj6bu4bxfgehfvwxn");
|
|
|
184 T("他们为什么不说中文", "xn--ihqwcrb4cv8a8dqg056pqjye");
|
|
|
185 T("他們爲什麽不說中文", "xn--ihqwctvzc91f659drss3x8bo0yb");
|
|
|
186 T("Pročprostěnemluvíčesky", "xn--Proprostnemluvesky-uyb24dma41a");
|
|
|
187 T("למההםפשוטלאמדבריםעברית", "xn--4dbcagdahymbxekheh6e0a7fei0b");
|
|
|
188 T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
|
|
|
189 "xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd");
|
|
|
190 T("なぜみんな日本語を話してくれないのか",
|
|
|
191 "xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa");
|
|
|
192 T("세계의모든사람들이한국어를이해한다면얼마나좋을까",
|
|
|
193 "xn--989aomsvi5e83db1d2a355cv1e0vak1d"
|
|
|
194 "wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c");
|
|
|
195 T("почемужеонинеговорятпорусски", "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
|
|
|
196 T("PorquénopuedensimplementehablarenEspañol",
|
|
|
197 "xn--PorqunopuedensimplementehablarenEspaol-fmd56a");
|
|
|
198 T("TạisaohọkhôngthểchỉnóitiếngViệt",
|
|
|
199 "xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g");
|
|
|
200 T("3年B組金八先生", "xn--3B-ww4c5e180e575a65lsy2b");
|
|
|
201 T("安室奈美恵-with-SUPER-MONKEYS",
|
|
|
202 "xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n");
|
|
|
203 T("Hello-Another-Way-それぞれの場所",
|
|
|
204 "xn--Hello-Another-Way--fc4qua05auwb3674vfr0b");
|
|
|
205 T("ひとつ屋根の下2", "xn--2-u9tlzr9756bt3uc0v");
|
|
|
206 T("MajiでKoiする5秒前", "xn--MajiKoi5-783gue6qz075azm5e");
|
|
|
207 T("パフィーdeルンバ", "xn--de-jg4avhby1noc0d");
|
|
|
208 T("そのスピードで", "xn--d9juau41awczczp");
|
|
|
209 T("-> $1.00 <-", "-> $1.00 <-");
|
|
|
210 /* Test cases from https://unicode.org/reports/tr46/ */
|
|
|
211 T("faß.de", "xn--fa-hia.de");
|
|
|
212 T("βόλος.com", "xn--nxasmm1c.com");
|
|
|
213 T("ශ්රී.com", "xn--10cl1a0b660p.com");
|
|
|
214 T("نامهای.com", "xn--mgba3gch31f060k.com");
|
|
|
215 return 0;
|
|
|
216 }
|
|
|
217
|
|
|
218 #undef T
|
|
|
219
|
|
|
220 #endif /* __MVS__ */
|
|
|
221
|
|
|
222 TEST_IMPL(wtf8) {
|
|
|
223 static const char input[] = "ᜄȺy𐞲:𞢢𘴇𐀀'¥3̞[<i$";
|
|
|
224 uint16_t buf[32];
|
|
|
225 ssize_t len;
|
|
|
226
|
|
|
227 len = uv_wtf8_length_as_utf16(input);
|
|
|
228 ASSERT_GT(len, 0);
|
|
|
229 ASSERT_LT(len, ARRAY_SIZE(buf));
|
|
|
230 uv_wtf8_to_utf16(input, buf, len);
|
|
|
231 return 0;
|
|
|
232 }
|