GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
// Copyright Joyent, Inc. and other Node contributors. |
||
2 |
// |
||
3 |
// Permission is hereby granted, free of charge, to any person obtaining a |
||
4 |
// copy of this software and associated documentation files (the |
||
5 |
// "Software"), to deal in the Software without restriction, including |
||
6 |
// without limitation the rights to use, copy, modify, merge, publish, |
||
7 |
// distribute, sublicense, and/or sell copies of the Software, and to permit |
||
8 |
// persons to whom the Software is furnished to do so, subject to the |
||
9 |
// following conditions: |
||
10 |
// |
||
11 |
// The above copyright notice and this permission notice shall be included |
||
12 |
// in all copies or substantial portions of the Software. |
||
13 |
// |
||
14 |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
15 |
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
16 |
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
||
17 |
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
||
18 |
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
19 |
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
20 |
// USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
21 |
|||
22 |
/* |
||
23 |
* notes: by srl295 |
||
24 |
* - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data |
||
25 |
* ( stubdata/libicudata.a ) containing nothing, no data, and it's also |
||
26 |
* linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT |
||
27 |
* macro names. That's the "english+root" data. |
||
28 |
* |
||
29 |
* If icu_data_path is non-null, the user has provided a path and we assume |
||
30 |
* it goes somewhere useful. We set that path in ICU, and exit. |
||
31 |
* If icu_data_path is null, they haven't set a path and we want the |
||
32 |
* "english+root" data. We call |
||
33 |
* udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...) |
||
34 |
* to load up the english+root data. |
||
35 |
* |
||
36 |
* - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full |
||
37 |
* data. All of the variables and command line options for changing data at |
||
38 |
* runtime are disabled, as they wouldn't fully override the internal data. |
||
39 |
* See: http://bugs.icu-project.org/trac/ticket/10924 |
||
40 |
*/ |
||
41 |
|||
42 |
|||
43 |
#include "node_i18n.h" |
||
44 |
|||
45 |
#if defined(NODE_HAVE_I18N_SUPPORT) |
||
46 |
|||
47 |
#include "base_object-inl.h" |
||
48 |
#include "node.h" |
||
49 |
#include "node_buffer.h" |
||
50 |
#include "node_errors.h" |
||
51 |
#include "node_internals.h" |
||
52 |
#include "util-inl.h" |
||
53 |
#include "v8.h" |
||
54 |
|||
55 |
#include <unicode/utypes.h> |
||
56 |
#include <unicode/putil.h> |
||
57 |
#include <unicode/uchar.h> |
||
58 |
#include <unicode/uclean.h> |
||
59 |
#include <unicode/udata.h> |
||
60 |
#include <unicode/uidna.h> |
||
61 |
#include <unicode/ucnv.h> |
||
62 |
#include <unicode/utf8.h> |
||
63 |
#include <unicode/utf16.h> |
||
64 |
#include <unicode/timezone.h> |
||
65 |
#include <unicode/ulocdata.h> |
||
66 |
#include <unicode/uvernum.h> |
||
67 |
#include <unicode/uversion.h> |
||
68 |
#include <unicode/ustring.h> |
||
69 |
|||
70 |
#ifdef NODE_HAVE_SMALL_ICU |
||
71 |
/* if this is defined, we have a 'secondary' entry point. |
||
72 |
compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ |
||
73 |
#define SMALL_ICUDATA_ENTRY_POINT \ |
||
74 |
SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) |
||
75 |
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) |
||
76 |
#ifndef U_LIB_SUFFIX_C_NAME |
||
77 |
#define SMALL_DEF(major, suff) icusmdt##major##_dat |
||
78 |
#else |
||
79 |
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat |
||
80 |
#endif |
||
81 |
|||
82 |
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; |
||
83 |
#endif |
||
84 |
|||
85 |
namespace node { |
||
86 |
|||
87 |
using v8::Context; |
||
88 |
using v8::FunctionCallbackInfo; |
||
89 |
using v8::HandleScope; |
||
90 |
using v8::Int32; |
||
91 |
using v8::Isolate; |
||
92 |
using v8::Local; |
||
93 |
using v8::MaybeLocal; |
||
94 |
using v8::NewStringType; |
||
95 |
using v8::Object; |
||
96 |
using v8::ObjectTemplate; |
||
97 |
using v8::String; |
||
98 |
using v8::Value; |
||
99 |
|||
100 |
namespace i18n { |
||
101 |
namespace { |
||
102 |
|||
103 |
template <typename T> |
||
104 |
534 |
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) { |
|
105 |
534 |
MaybeLocal<Object> ret = Buffer::New(env, buf); |
|
106 |
✗✓✗✓ |
534 |
if (ret.IsEmpty()) |
107 |
return ret; |
||
108 |
|||
109 |
static_assert(sizeof(T) == 1 || sizeof(T) == 2, |
||
110 |
"Currently only one- or two-byte buffers are supported"); |
||
111 |
✗✓✗✓ ✗✓ |
534 |
if (sizeof(T) > 1 && IsBigEndian()) { |
112 |
SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf); |
||
113 |
SwapBytes16(retbuf_data, retbuf_length); |
||
114 |
} |
||
115 |
|||
116 |
534 |
return ret; |
|
117 |
} |
||
118 |
|||
119 |
struct Converter { |
||
120 |
8 |
explicit Converter(const char* name, const char* sub = nullptr) |
|
121 |
8 |
: conv(nullptr) { |
|
122 |
8 |
UErrorCode status = U_ZERO_ERROR; |
|
123 |
8 |
conv = ucnv_open(name, &status); |
|
124 |
✗✓ | 8 |
CHECK(U_SUCCESS(status)); |
125 |
✓✓ | 8 |
if (sub != nullptr) { |
126 |
2 |
ucnv_setSubstChars(conv, sub, strlen(sub), &status); |
|
127 |
} |
||
128 |
8 |
} |
|
129 |
|||
130 |
205 |
explicit Converter(UConverter* converter, |
|
131 |
205 |
const char* sub = nullptr) : conv(converter) { |
|
132 |
✗✓ | 205 |
CHECK_NOT_NULL(conv); |
133 |
205 |
UErrorCode status = U_ZERO_ERROR; |
|
134 |
✗✓ | 205 |
if (sub != nullptr) { |
135 |
ucnv_setSubstChars(conv, sub, strlen(sub), &status); |
||
136 |
} |
||
137 |
205 |
} |
|
138 |
|||
139 |
213 |
~Converter() { |
|
140 |
213 |
ucnv_close(conv); |
|
141 |
213 |
} |
|
142 |
|||
143 |
UConverter* conv; |
||
144 |
}; |
||
145 |
|||
146 |
class ConverterObject : public BaseObject, Converter { |
||
147 |
public: |
||
148 |
enum ConverterFlags { |
||
149 |
CONVERTER_FLAGS_FLUSH = 0x1, |
||
150 |
CONVERTER_FLAGS_FATAL = 0x2, |
||
151 |
CONVERTER_FLAGS_IGNORE_BOM = 0x4 |
||
152 |
}; |
||
153 |
|||
154 |
✗✓ | 410 |
~ConverterObject() override = default; |
155 |
|||
156 |
2 |
static void Has(const FunctionCallbackInfo<Value>& args) { |
|
157 |
2 |
Environment* env = Environment::GetCurrent(args); |
|
158 |
2 |
HandleScope scope(env->isolate()); |
|
159 |
|||
160 |
✗✓ | 2 |
CHECK_GE(args.Length(), 1); |
161 |
4 |
Utf8Value label(env->isolate(), args[0]); |
|
162 |
|||
163 |
2 |
UErrorCode status = U_ZERO_ERROR; |
|
164 |
2 |
UConverter* conv = ucnv_open(*label, &status); |
|
165 |
6 |
args.GetReturnValue().Set(!!U_SUCCESS(status)); |
|
166 |
4 |
ucnv_close(conv); |
|
167 |
2 |
} |
|
168 |
|||
169 |
205 |
static void Create(const FunctionCallbackInfo<Value>& args) { |
|
170 |
205 |
Environment* env = Environment::GetCurrent(args); |
|
171 |
205 |
HandleScope scope(env->isolate()); |
|
172 |
|||
173 |
205 |
Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate()); |
|
174 |
205 |
t->SetInternalFieldCount(1); |
|
175 |
Local<Object> obj; |
||
176 |
✗✓ | 615 |
if (!t->NewInstance(env->context()).ToLocal(&obj)) return; |
177 |
|||
178 |
✗✓ | 205 |
CHECK_GE(args.Length(), 2); |
179 |
✓✗ | 410 |
Utf8Value label(env->isolate(), args[0]); |
180 |
820 |
int flags = args[1]->Uint32Value(env->context()).ToChecked(); |
|
181 |
bool fatal = |
||
182 |
205 |
(flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; |
|
183 |
bool ignoreBOM = |
||
184 |
205 |
(flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM; |
|
185 |
|||
186 |
205 |
UErrorCode status = U_ZERO_ERROR; |
|
187 |
205 |
UConverter* conv = ucnv_open(*label, &status); |
|
188 |
✗✓ | 205 |
if (U_FAILURE(status)) |
189 |
return; |
||
190 |
|||
191 |
✓✓ | 205 |
if (fatal) { |
192 |
94 |
status = U_ZERO_ERROR; |
|
193 |
ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, |
||
194 |
94 |
nullptr, nullptr, nullptr, &status); |
|
195 |
} |
||
196 |
|||
197 |
205 |
new ConverterObject(env, obj, conv, ignoreBOM); |
|
198 |
✓✗ | 615 |
args.GetReturnValue().Set(obj); |
199 |
} |
||
200 |
|||
201 |
616 |
static void Decode(const FunctionCallbackInfo<Value>& args) { |
|
202 |
616 |
Environment* env = Environment::GetCurrent(args); |
|
203 |
|||
204 |
✗✓ | 616 |
CHECK_GE(args.Length(), 3); // Converter, Buffer, Flags |
205 |
|||
206 |
ConverterObject* converter; |
||
207 |
✗✓ | 1756 |
ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>()); |
208 |
616 |
ArrayBufferViewContents<char> input(args[1]); |
|
209 |
2464 |
int flags = args[2]->Uint32Value(env->context()).ToChecked(); |
|
210 |
|||
211 |
616 |
UErrorCode status = U_ZERO_ERROR; |
|
212 |
616 |
MaybeStackBuffer<UChar> result; |
|
213 |
MaybeLocal<Object> ret; |
||
214 |
616 |
size_t limit = ucnv_getMinCharSize(converter->conv) * input.length(); |
|
215 |
✓✓ | 616 |
if (limit > 0) |
216 |
590 |
result.AllocateSufficientStorage(limit); |
|
217 |
|||
218 |
616 |
UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; |
|
219 |
616 |
OnScopeLeave cleanup([&]() { |
|
220 |
✓✓ | 616 |
if (flush) { |
221 |
// Reset the converter state. |
||
222 |
198 |
converter->bomSeen_ = false; |
|
223 |
198 |
ucnv_reset(converter->conv); |
|
224 |
} |
||
225 |
✓✓ | 1324 |
}); |
226 |
|||
227 |
616 |
const char* source = input.data(); |
|
228 |
616 |
size_t source_length = input.length(); |
|
229 |
|||
230 |
✓✗✓✓ ✓✓ |
616 |
if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) { |
231 |
187 |
int32_t bomOffset = 0; |
|
232 |
187 |
ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status); |
|
233 |
187 |
source += bomOffset; |
|
234 |
187 |
source_length -= bomOffset; |
|
235 |
187 |
converter->bomSeen_ = true; |
|
236 |
} |
||
237 |
|||
238 |
616 |
UChar* target = *result; |
|
239 |
ucnv_toUnicode(converter->conv, |
||
240 |
616 |
&target, target + (limit * sizeof(UChar)), |
|
241 |
&source, source + source_length, |
||
242 |
1232 |
nullptr, flush, &status); |
|
243 |
|||
244 |
✓✓ | 616 |
if (U_SUCCESS(status)) { |
245 |
✓✓ | 524 |
if (limit > 0) |
246 |
498 |
result.SetLength(target - &result[0]); |
|
247 |
524 |
ret = ToBufferEndian(env, &result); |
|
248 |
1048 |
args.GetReturnValue().Set(ret.ToLocalChecked()); |
|
249 |
524 |
return; |
|
250 |
} |
||
251 |
|||
252 |
✓✓ | 368 |
args.GetReturnValue().Set(status); |
253 |
} |
||
254 |
|||
255 |
SET_NO_MEMORY_INFO() |
||
256 |
SET_MEMORY_INFO_NAME(ConverterObject) |
||
257 |
SET_SELF_SIZE(ConverterObject) |
||
258 |
|||
259 |
protected: |
||
260 |
205 |
ConverterObject(Environment* env, |
|
261 |
Local<Object> wrap, |
||
262 |
UConverter* converter, |
||
263 |
bool ignoreBOM, |
||
264 |
const char* sub = nullptr) : |
||
265 |
BaseObject(env, wrap), |
||
266 |
Converter(converter, sub), |
||
267 |
205 |
ignoreBOM_(ignoreBOM) { |
|
268 |
205 |
MakeWeak(); |
|
269 |
|||
270 |
✓✗ | 205 |
switch (ucnv_getType(converter)) { |
271 |
case UCNV_UTF8: |
||
272 |
case UCNV_UTF16_BigEndian: |
||
273 |
case UCNV_UTF16_LittleEndian: |
||
274 |
205 |
unicode_ = true; |
|
275 |
205 |
break; |
|
276 |
default: |
||
277 |
unicode_ = false; |
||
278 |
} |
||
279 |
205 |
} |
|
280 |
|||
281 |
private: |
||
282 |
bool unicode_ = false; // True if this is a Unicode converter |
||
283 |
bool ignoreBOM_ = false; // True if the BOM should be ignored on Unicode |
||
284 |
bool bomSeen_ = false; // True if the BOM has been seen |
||
285 |
}; |
||
286 |
|||
287 |
// One-Shot Converters |
||
288 |
|||
289 |
2 |
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest, |
|
290 |
const char* data, |
||
291 |
const size_t length, |
||
292 |
const size_t length_in_chars) { |
||
293 |
2 |
dest->AllocateSufficientStorage(length_in_chars); |
|
294 |
2 |
char* dst = reinterpret_cast<char*>(**dest); |
|
295 |
2 |
memcpy(dst, data, length); |
|
296 |
✗✓ | 2 |
if (IsBigEndian()) { |
297 |
SwapBytes16(dst, length); |
||
298 |
} |
||
299 |
2 |
} |
|
300 |
|||
301 |
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env, |
||
302 |
const char* fromEncoding, |
||
303 |
const char* toEncoding, |
||
304 |
const char* source, |
||
305 |
const size_t source_length, |
||
306 |
UErrorCode* status); |
||
307 |
|||
308 |
2 |
MaybeLocal<Object> Transcode(Environment* env, |
|
309 |
const char* fromEncoding, |
||
310 |
const char* toEncoding, |
||
311 |
const char* source, |
||
312 |
const size_t source_length, |
||
313 |
UErrorCode* status) { |
||
314 |
2 |
*status = U_ZERO_ERROR; |
|
315 |
MaybeLocal<Object> ret; |
||
316 |
2 |
MaybeStackBuffer<char> result; |
|
317 |
4 |
Converter to(toEncoding, "?"); |
|
318 |
4 |
Converter from(fromEncoding); |
|
319 |
2 |
const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv); |
|
320 |
2 |
result.AllocateSufficientStorage(limit); |
|
321 |
2 |
char* target = *result; |
|
322 |
ucnv_convertEx(to.conv, from.conv, &target, target + limit, |
||
323 |
&source, source + source_length, nullptr, nullptr, |
||
324 |
2 |
nullptr, nullptr, true, true, status); |
|
325 |
✓✗ | 2 |
if (U_SUCCESS(*status)) { |
326 |
2 |
result.SetLength(target - &result[0]); |
|
327 |
2 |
ret = ToBufferEndian(env, &result); |
|
328 |
} |
||
329 |
4 |
return ret; |
|
330 |
} |
||
331 |
|||
332 |
4 |
MaybeLocal<Object> TranscodeToUcs2(Environment* env, |
|
333 |
const char* fromEncoding, |
||
334 |
const char* toEncoding, |
||
335 |
const char* source, |
||
336 |
const size_t source_length, |
||
337 |
UErrorCode* status) { |
||
338 |
4 |
*status = U_ZERO_ERROR; |
|
339 |
MaybeLocal<Object> ret; |
||
340 |
MaybeStackBuffer<UChar> destbuf(source_length); |
||
341 |
8 |
Converter from(fromEncoding); |
|
342 |
4 |
const size_t length_in_chars = source_length * sizeof(UChar); |
|
343 |
ucnv_toUChars(from.conv, *destbuf, length_in_chars, |
||
344 |
4 |
source, source_length, status); |
|
345 |
✓✗ | 4 |
if (U_SUCCESS(*status)) |
346 |
4 |
ret = ToBufferEndian(env, &destbuf); |
|
347 |
8 |
return ret; |
|
348 |
} |
||
349 |
|||
350 |
MaybeLocal<Object> TranscodeFromUcs2(Environment* env, |
||
351 |
const char* fromEncoding, |
||
352 |
const char* toEncoding, |
||
353 |
const char* source, |
||
354 |
const size_t source_length, |
||
355 |
UErrorCode* status) { |
||
356 |
*status = U_ZERO_ERROR; |
||
357 |
MaybeStackBuffer<UChar> sourcebuf; |
||
358 |
MaybeLocal<Object> ret; |
||
359 |
Converter to(toEncoding, "?"); |
||
360 |
const size_t length_in_chars = source_length / sizeof(UChar); |
||
361 |
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); |
||
362 |
MaybeStackBuffer<char> destbuf(length_in_chars); |
||
363 |
const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars, |
||
364 |
*sourcebuf, length_in_chars, status); |
||
365 |
if (U_SUCCESS(*status)) { |
||
366 |
destbuf.SetLength(len); |
||
367 |
ret = ToBufferEndian(env, &destbuf); |
||
368 |
} |
||
369 |
return ret; |
||
370 |
} |
||
371 |
|||
372 |
2 |
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env, |
|
373 |
const char* fromEncoding, |
||
374 |
const char* toEncoding, |
||
375 |
const char* source, |
||
376 |
const size_t source_length, |
||
377 |
UErrorCode* status) { |
||
378 |
2 |
*status = U_ZERO_ERROR; |
|
379 |
2 |
MaybeStackBuffer<UChar> destbuf; |
|
380 |
int32_t result_length; |
||
381 |
2 |
u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length, |
|
382 |
4 |
source, source_length, status); |
|
383 |
MaybeLocal<Object> ret; |
||
384 |
✓✓ | 2 |
if (U_SUCCESS(*status)) { |
385 |
1 |
destbuf.SetLength(result_length); |
|
386 |
1 |
ret = ToBufferEndian(env, &destbuf); |
|
387 |
✓✗ | 1 |
} else if (*status == U_BUFFER_OVERFLOW_ERROR) { |
388 |
1 |
*status = U_ZERO_ERROR; |
|
389 |
1 |
destbuf.AllocateSufficientStorage(result_length); |
|
390 |
u_strFromUTF8(*destbuf, result_length, &result_length, |
||
391 |
1 |
source, source_length, status); |
|
392 |
✓✗ | 1 |
if (U_SUCCESS(*status)) { |
393 |
1 |
destbuf.SetLength(result_length); |
|
394 |
1 |
ret = ToBufferEndian(env, &destbuf); |
|
395 |
} |
||
396 |
} |
||
397 |
2 |
return ret; |
|
398 |
} |
||
399 |
|||
400 |
2 |
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env, |
|
401 |
const char* fromEncoding, |
||
402 |
const char* toEncoding, |
||
403 |
const char* source, |
||
404 |
const size_t source_length, |
||
405 |
UErrorCode* status) { |
||
406 |
2 |
*status = U_ZERO_ERROR; |
|
407 |
MaybeLocal<Object> ret; |
||
408 |
2 |
const size_t length_in_chars = source_length / sizeof(UChar); |
|
409 |
int32_t result_length; |
||
410 |
2 |
MaybeStackBuffer<UChar> sourcebuf; |
|
411 |
4 |
MaybeStackBuffer<char> destbuf; |
|
412 |
2 |
CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); |
|
413 |
2 |
u_strToUTF8(*destbuf, destbuf.capacity(), &result_length, |
|
414 |
4 |
*sourcebuf, length_in_chars, status); |
|
415 |
✓✓ | 2 |
if (U_SUCCESS(*status)) { |
416 |
1 |
destbuf.SetLength(result_length); |
|
417 |
1 |
ret = ToBufferEndian(env, &destbuf); |
|
418 |
✓✗ | 1 |
} else if (*status == U_BUFFER_OVERFLOW_ERROR) { |
419 |
1 |
*status = U_ZERO_ERROR; |
|
420 |
1 |
destbuf.AllocateSufficientStorage(result_length); |
|
421 |
1 |
u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf, |
|
422 |
2 |
length_in_chars, status); |
|
423 |
✓✗ | 1 |
if (U_SUCCESS(*status)) { |
424 |
1 |
destbuf.SetLength(result_length); |
|
425 |
1 |
ret = ToBufferEndian(env, &destbuf); |
|
426 |
} |
||
427 |
} |
||
428 |
4 |
return ret; |
|
429 |
} |
||
430 |
|||
431 |
20 |
const char* EncodingName(const enum encoding encoding) { |
|
432 |
✓✓✓✓ ✗ |
20 |
switch (encoding) { |
433 |
2 |
case ASCII: return "us-ascii"; |
|
434 |
4 |
case LATIN1: return "iso8859-1"; |
|
435 |
8 |
case UCS2: return "utf16le"; |
|
436 |
6 |
case UTF8: return "utf-8"; |
|
437 |
default: return nullptr; |
||
438 |
} |
||
439 |
} |
||
440 |
|||
441 |
22 |
bool SupportedEncoding(const enum encoding encoding) { |
|
442 |
✓✓ | 22 |
switch (encoding) { |
443 |
case ASCII: |
||
444 |
case LATIN1: |
||
445 |
case UCS2: |
||
446 |
20 |
case UTF8: return true; |
|
447 |
2 |
default: return false; |
|
448 |
} |
||
449 |
} |
||
450 |
|||
451 |
12 |
void Transcode(const FunctionCallbackInfo<Value>&args) { |
|
452 |
12 |
Environment* env = Environment::GetCurrent(args); |
|
453 |
12 |
Isolate* isolate = env->isolate(); |
|
454 |
12 |
UErrorCode status = U_ZERO_ERROR; |
|
455 |
MaybeLocal<Object> result; |
||
456 |
|||
457 |
12 |
ArrayBufferViewContents<char> input(args[0]); |
|
458 |
12 |
const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER); |
|
459 |
12 |
const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER); |
|
460 |
|||
461 |
✓✓✓✗ ✓✓ |
12 |
if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) { |
462 |
10 |
TranscodeFunc tfn = &Transcode; |
|
463 |
✓✓✓✗ |
10 |
switch (fromEncoding) { |
464 |
case ASCII: |
||
465 |
case LATIN1: |
||
466 |
✓✗ | 4 |
if (toEncoding == UCS2) |
467 |
4 |
tfn = &TranscodeToUcs2; |
|
468 |
4 |
break; |
|
469 |
case UTF8: |
||
470 |
✓✓ | 4 |
if (toEncoding == UCS2) |
471 |
2 |
tfn = &TranscodeUcs2FromUtf8; |
|
472 |
4 |
break; |
|
473 |
case UCS2: |
||
474 |
✗✓✗ | 2 |
switch (toEncoding) { |
475 |
case UCS2: |
||
476 |
tfn = &Transcode; |
||
477 |
break; |
||
478 |
case UTF8: |
||
479 |
2 |
tfn = &TranscodeUtf8FromUcs2; |
|
480 |
2 |
break; |
|
481 |
default: |
||
482 |
tfn = &TranscodeFromUcs2; |
||
483 |
} |
||
484 |
2 |
break; |
|
485 |
default: |
||
486 |
// This should not happen because of the SupportedEncoding checks |
||
487 |
ABORT(); |
||
488 |
} |
||
489 |
|||
490 |
result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding), |
||
491 |
10 |
input.data(), input.length(), &status); |
|
492 |
} else { |
||
493 |
2 |
status = U_ILLEGAL_ARGUMENT_ERROR; |
|
494 |
} |
||
495 |
|||
496 |
✓✓ | 12 |
if (result.IsEmpty()) |
497 |
6 |
return args.GetReturnValue().Set(status); |
|
498 |
|||
499 |
20 |
return args.GetReturnValue().Set(result.ToLocalChecked()); |
|
500 |
} |
||
501 |
|||
502 |
2 |
void ICUErrorName(const FunctionCallbackInfo<Value>& args) { |
|
503 |
2 |
Environment* env = Environment::GetCurrent(args); |
|
504 |
✗✓ | 4 |
CHECK(args[0]->IsInt32()); |
505 |
6 |
UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value()); |
|
506 |
args.GetReturnValue().Set( |
||
507 |
String::NewFromUtf8(env->isolate(), |
||
508 |
u_errorName(status), |
||
509 |
6 |
NewStringType::kNormal).ToLocalChecked()); |
|
510 |
2 |
} |
|
511 |
|||
512 |
} // anonymous namespace |
||
513 |
|||
514 |
4981 |
bool InitializeICUDirectory(const std::string& path) { |
|
515 |
4981 |
UErrorCode status = U_ZERO_ERROR; |
|
516 |
✓✓ | 4981 |
if (path.empty()) { |
517 |
#ifdef NODE_HAVE_SMALL_ICU |
||
518 |
// install the 'small' data. |
||
519 |
4979 |
udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status); |
|
520 |
#else // !NODE_HAVE_SMALL_ICU |
||
521 |
// no small data, so nothing to do. |
||
522 |
#endif // !NODE_HAVE_SMALL_ICU |
||
523 |
} else { |
||
524 |
2 |
u_setDataDirectory(path.c_str()); |
|
525 |
2 |
u_init(&status); |
|
526 |
} |
||
527 |
4981 |
return status == U_ZERO_ERROR; |
|
528 |
} |
||
529 |
|||
530 |
382 |
int32_t ToUnicode(MaybeStackBuffer<char>* buf, |
|
531 |
const char* input, |
||
532 |
size_t length) { |
||
533 |
382 |
UErrorCode status = U_ZERO_ERROR; |
|
534 |
382 |
uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE; |
|
535 |
382 |
UIDNA* uidna = uidna_openUTS46(options, &status); |
|
536 |
✗✓ | 382 |
if (U_FAILURE(status)) |
537 |
return -1; |
||
538 |
382 |
UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
|
539 |
|||
540 |
int32_t len = uidna_nameToUnicodeUTF8(uidna, |
||
541 |
input, length, |
||
542 |
382 |
**buf, buf->capacity(), |
|
543 |
&info, |
||
544 |
382 |
&status); |
|
545 |
|||
546 |
// Do not check info.errors like we do with ToASCII since ToUnicode always |
||
547 |
// returns a string, despite any possible errors that may have occurred. |
||
548 |
|||
549 |
✗✓ | 382 |
if (status == U_BUFFER_OVERFLOW_ERROR) { |
550 |
status = U_ZERO_ERROR; |
||
551 |
buf->AllocateSufficientStorage(len); |
||
552 |
len = uidna_nameToUnicodeUTF8(uidna, |
||
553 |
input, length, |
||
554 |
**buf, buf->capacity(), |
||
555 |
&info, |
||
556 |
&status); |
||
557 |
} |
||
558 |
|||
559 |
// info.errors is ignored as UTS #46 ToUnicode always produces a Unicode |
||
560 |
// string, regardless of whether an error occurred. |
||
561 |
|||
562 |
✗✓ | 382 |
if (U_FAILURE(status)) { |
563 |
len = -1; |
||
564 |
buf->SetLength(0); |
||
565 |
} else { |
||
566 |
382 |
buf->SetLength(len); |
|
567 |
} |
||
568 |
|||
569 |
382 |
uidna_close(uidna); |
|
570 |
382 |
return len; |
|
571 |
} |
||
572 |
|||
573 |
13776 |
int32_t ToASCII(MaybeStackBuffer<char>* buf, |
|
574 |
const char* input, |
||
575 |
size_t length, |
||
576 |
enum idna_mode mode) { |
||
577 |
13776 |
UErrorCode status = U_ZERO_ERROR; |
|
578 |
uint32_t options = // CheckHyphens = false; handled later |
||
579 |
UIDNA_CHECK_BIDI | // CheckBidi = true |
||
580 |
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true |
||
581 |
13776 |
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing |
|
582 |
✗✓ | 13776 |
if (mode == IDNA_STRICT) { |
583 |
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict |
||
584 |
// VerifyDnsLength = beStrict; |
||
585 |
// handled later |
||
586 |
} |
||
587 |
|||
588 |
13776 |
UIDNA* uidna = uidna_openUTS46(options, &status); |
|
589 |
✗✓ | 13776 |
if (U_FAILURE(status)) |
590 |
return -1; |
||
591 |
13776 |
UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
|
592 |
|||
593 |
int32_t len = uidna_nameToASCII_UTF8(uidna, |
||
594 |
input, length, |
||
595 |
13776 |
**buf, buf->capacity(), |
|
596 |
&info, |
||
597 |
13776 |
&status); |
|
598 |
|||
599 |
✓✓ | 13776 |
if (status == U_BUFFER_OVERFLOW_ERROR) { |
600 |
2 |
status = U_ZERO_ERROR; |
|
601 |
2 |
buf->AllocateSufficientStorage(len); |
|
602 |
len = uidna_nameToASCII_UTF8(uidna, |
||
603 |
input, length, |
||
604 |
2 |
**buf, buf->capacity(), |
|
605 |
&info, |
||
606 |
2 |
&status); |
|
607 |
} |
||
608 |
|||
609 |
// In UTS #46 which specifies ToASCII, certain error conditions are |
||
610 |
// configurable through options, and the WHATWG URL Standard promptly elects |
||
611 |
// to disable some of them to accommodate for real-world use cases. |
||
612 |
// Unfortunately, ICU4C's IDNA module does not support disabling some of |
||
613 |
// these options through `options` above, and thus continues throwing |
||
614 |
// unnecessary errors. To counter this situation, we just filter out the |
||
615 |
// errors that may have happened afterwards, before deciding whether to |
||
616 |
// return an error from this function. |
||
617 |
|||
618 |
// CheckHyphens = false |
||
619 |
// (Specified in the current UTS #46 draft rev. 18.) |
||
620 |
// Refs: |
||
621 |
// - https://github.com/whatwg/url/issues/53 |
||
622 |
// - https://github.com/whatwg/url/pull/309 |
||
623 |
// - http://www.unicode.org/review/pri317/ |
||
624 |
// - http://www.unicode.org/reports/tr46/tr46-18.html |
||
625 |
// - https://www.icann.org/news/announcement-2000-01-07-en |
||
626 |
13776 |
info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; |
|
627 |
13776 |
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; |
|
628 |
13776 |
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; |
|
629 |
|||
630 |
✓✗ | 13776 |
if (mode != IDNA_STRICT) { |
631 |
// VerifyDnsLength = beStrict |
||
632 |
13776 |
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; |
|
633 |
13776 |
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; |
|
634 |
13776 |
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; |
|
635 |
} |
||
636 |
|||
637 |
✓✗✓✓ ✓✓✓✓ |
13776 |
if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) { |
638 |
78 |
len = -1; |
|
639 |
78 |
buf->SetLength(0); |
|
640 |
} else { |
||
641 |
13698 |
buf->SetLength(len); |
|
642 |
} |
||
643 |
|||
644 |
13776 |
uidna_close(uidna); |
|
645 |
13776 |
return len; |
|
646 |
} |
||
647 |
|||
648 |
189 |
static void ToUnicode(const FunctionCallbackInfo<Value>& args) { |
|
649 |
189 |
Environment* env = Environment::GetCurrent(args); |
|
650 |
✗✓ | 189 |
CHECK_GE(args.Length(), 1); |
651 |
✗✓ | 567 |
CHECK(args[0]->IsString()); |
652 |
189 |
Utf8Value val(env->isolate(), args[0]); |
|
653 |
|||
654 |
✓✗ | 378 |
MaybeStackBuffer<char> buf; |
655 |
189 |
int32_t len = ToUnicode(&buf, *val, val.length()); |
|
656 |
|||
657 |
✗✓ | 189 |
if (len < 0) { |
658 |
189 |
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode"); |
|
659 |
} |
||
660 |
|||
661 |
args.GetReturnValue().Set( |
||
662 |
String::NewFromUtf8(env->isolate(), |
||
663 |
189 |
*buf, |
|
664 |
NewStringType::kNormal, |
||
665 |
✓✗ | 756 |
len).ToLocalChecked()); |
666 |
} |
||
667 |
|||
668 |
11096 |
static void ToASCII(const FunctionCallbackInfo<Value>& args) { |
|
669 |
11096 |
Environment* env = Environment::GetCurrent(args); |
|
670 |
✗✓ | 11096 |
CHECK_GE(args.Length(), 1); |
671 |
✗✓ | 33288 |
CHECK(args[0]->IsString()); |
672 |
11096 |
Utf8Value val(env->isolate(), args[0]); |
|
673 |
// optional arg |
||
674 |
33288 |
bool lenient = args[1]->BooleanValue(env->isolate()); |
|
675 |
✓✓ | 11096 |
enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT; |
676 |
|||
677 |
✓✓ | 22183 |
MaybeStackBuffer<char> buf; |
678 |
11096 |
int32_t len = ToASCII(&buf, *val, val.length(), mode); |
|
679 |
|||
680 |
✓✓ | 11096 |
if (len < 0) { |
681 |
11105 |
return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII"); |
|
682 |
} |
||
683 |
|||
684 |
args.GetReturnValue().Set( |
||
685 |
String::NewFromUtf8(env->isolate(), |
||
686 |
11087 |
*buf, |
|
687 |
NewStringType::kNormal, |
||
688 |
✓✓ | 44348 |
len).ToLocalChecked()); |
689 |
} |
||
690 |
|||
691 |
// This is similar to wcwidth except that it takes the current unicode |
||
692 |
// character properties database into consideration, allowing it to |
||
693 |
// correctly calculate the column widths of things like emoji's and |
||
694 |
// newer wide characters. wcwidth, on the other hand, uses a fixed |
||
695 |
// algorithm that does not take things like emoji into proper |
||
696 |
// consideration. |
||
697 |
// |
||
698 |
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by |
||
699 |
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs |
||
700 |
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't |
||
701 |
// allow it to be input. Linux's PTY terminal prints control characters as |
||
702 |
// Narrow rhombi. |
||
703 |
// |
||
704 |
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final |
||
705 |
// consonants are 0-width when combined with initial consonants; otherwise they |
||
706 |
// are technically Wide. But many terminals (including Konsole and |
||
707 |
// VTE/GLib-based) implement all medials and finals as 0-width. |
||
708 |
// |
||
709 |
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width |
||
710 |
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420 |
||
711 |
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223 |
||
712 |
2134516 |
static int GetColumnWidth(UChar32 codepoint, |
|
713 |
bool ambiguous_as_full_width = false) { |
||
714 |
const auto zero_width_mask = U_GC_CC_MASK | // C0/C1 control code |
||
715 |
U_GC_CF_MASK | // Format control character |
||
716 |
U_GC_ME_MASK | // Enclosing mark |
||
717 |
2134516 |
U_GC_MN_MASK; // Nonspacing mark |
|
718 |
✓✓✓✓ ✓✓ |
4269536 |
if (codepoint != 0x00AD && // SOFT HYPHEN is Cf but not zero-width |
719 |
✓✓ | 4268502 |
((U_MASK(u_charType(codepoint)) & zero_width_mask) || |
720 |
2133995 |
u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) { |
|
721 |
513 |
return 0; |
|
722 |
} |
||
723 |
|||
724 |
// UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a |
||
725 |
// codepoint as being full width, wide, ambiguous, neutral, narrow, |
||
726 |
// or halfwidth. |
||
727 |
2134003 |
const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH); |
|
728 |
✓✓✓✓ |
2134003 |
switch (eaw) { |
729 |
case U_EA_FULLWIDTH: |
||
730 |
case U_EA_WIDE: |
||
731 |
642 |
return 2; |
|
732 |
case U_EA_AMBIGUOUS: |
||
733 |
// See: http://www.unicode.org/reports/tr11/#Ambiguous for details |
||
734 |
✓✓ | 356 |
if (ambiguous_as_full_width) { |
735 |
177 |
return 2; |
|
736 |
} |
||
737 |
// If ambiguous_as_full_width is false: |
||
738 |
// Fall through |
||
739 |
case U_EA_NEUTRAL: |
||
740 |
✗✓ | 604 |
if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) { |
741 |
return 2; |
||
742 |
} |
||
743 |
// Fall through |
||
744 |
case U_EA_HALFWIDTH: |
||
745 |
case U_EA_NARROW: |
||
746 |
default: |
||
747 |
2133184 |
return 1; |
|
748 |
} |
||
749 |
} |
||
750 |
|||
751 |
// Returns the column width for the given String. |
||
752 |
2134207 |
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) { |
|
753 |
2134207 |
Environment* env = Environment::GetCurrent(args); |
|
754 |
✗✓ | 2134207 |
if (args.Length() < 1) |
755 |
2133147 |
return; |
|
756 |
|||
757 |
4268414 |
bool ambiguous_as_full_width = args[1]->IsTrue(); |
|
758 |
4268414 |
bool expand_emoji_sequence = args[2]->IsTrue(); |
|
759 |
|||
760 |
✓✓ | 4268414 |
if (args[0]->IsNumber()) { |
761 |
uint32_t val; |
||
762 |
✗✓ | 8532588 |
if (!args[0]->Uint32Value(env->context()).To(&val)) return; |
763 |
6399441 |
args.GetReturnValue().Set(GetColumnWidth(val, ambiguous_as_full_width)); |
|
764 |
2133147 |
return; |
|
765 |
} |
||
766 |
|||
767 |
1060 |
TwoByteValue value(env->isolate(), args[0]); |
|
768 |
// reinterpret_cast is required by windows to compile |
||
769 |
1060 |
UChar* str = reinterpret_cast<UChar*>(*value); |
|
770 |
static_assert(sizeof(*str) == sizeof(**value), |
||
771 |
"sizeof(*str) == sizeof(**value)"); |
||
772 |
1060 |
UChar32 c = 0; |
|
773 |
UChar32 p; |
||
774 |
1060 |
size_t n = 0; |
|
775 |
1060 |
uint32_t width = 0; |
|
776 |
|||
777 |
✓✓ | 3492 |
while (n < value.length()) { |
778 |
1372 |
p = c; |
|
779 |
✓✓✓✓ ✓✗✓✓ |
1372 |
U16_NEXT(str, n, value.length(), c); |
780 |
// Don't count individual emoji codepoints that occur within an |
||
781 |
// emoji sequence. This is not necessarily foolproof. Some |
||
782 |
// environments display emoji sequences in the appropriate |
||
783 |
// condensed form (as a single emoji glyph), other environments |
||
784 |
// may not understand an emoji sequence and will display each |
||
785 |
// individual emoji separately. When this happens, the width |
||
786 |
// calculated will be off, and there's no reliable way of knowing |
||
787 |
// in advance if a particular sequence is going to be supported. |
||
788 |
// The expand_emoji_sequence option allows the caller to skip this |
||
789 |
// check and count each code within an emoji sequence separately. |
||
790 |
✓✗✓✓ |
4109 |
if (!expand_emoji_sequence && |
791 |
✓✓✓✓ ✗✓ |
2743 |
n > 0 && p == 0x200d && // 0x200d == ZWJ (zero width joiner) |
792 |
✗✗ | 3 |
(u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) || |
793 |
u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) { |
||
794 |
3 |
continue; |
|
795 |
} |
||
796 |
1369 |
width += GetColumnWidth(c, ambiguous_as_full_width); |
|
797 |
} |
||
798 |
2120 |
args.GetReturnValue().Set(width); |
|
799 |
} |
||
800 |
|||
801 |
5178 |
void Initialize(Local<Object> target, |
|
802 |
Local<Value> unused, |
||
803 |
Local<Context> context, |
||
804 |
void* priv) { |
||
805 |
5178 |
Environment* env = Environment::GetCurrent(context); |
|
806 |
5178 |
env->SetMethod(target, "toUnicode", ToUnicode); |
|
807 |
5178 |
env->SetMethod(target, "toASCII", ToASCII); |
|
808 |
5178 |
env->SetMethod(target, "getStringWidth", GetStringWidth); |
|
809 |
|||
810 |
// One-shot converters |
||
811 |
5178 |
env->SetMethod(target, "icuErrName", ICUErrorName); |
|
812 |
5178 |
env->SetMethod(target, "transcode", Transcode); |
|
813 |
|||
814 |
// ConverterObject |
||
815 |
5178 |
env->SetMethod(target, "getConverter", ConverterObject::Create); |
|
816 |
5178 |
env->SetMethod(target, "decode", ConverterObject::Decode); |
|
817 |
5178 |
env->SetMethod(target, "hasConverter", ConverterObject::Has); |
|
818 |
5178 |
} |
|
819 |
|||
820 |
} // namespace i18n |
||
821 |
} // namespace node |
||
822 |
|||
823 |
5033 |
NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize) |
|
824 |
|||
825 |
#endif // NODE_HAVE_I18N_SUPPORT |
Generated by: GCOVR (Version 3.4) |