GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_i18n.cc Lines: 355 396 89.6 %
Date: 2020-09-03 22:13:26 Branches: 145 226 64.2 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
#include "node_external_reference.h"
45
46
#if defined(NODE_HAVE_I18N_SUPPORT)
47
48
#include "base_object-inl.h"
49
#include "node.h"
50
#include "node_buffer.h"
51
#include "node_errors.h"
52
#include "node_internals.h"
53
#include "util-inl.h"
54
#include "v8.h"
55
56
#include <unicode/utypes.h>
57
#include <unicode/putil.h>
58
#include <unicode/uchar.h>
59
#include <unicode/uclean.h>
60
#include <unicode/udata.h>
61
#include <unicode/uidna.h>
62
#include <unicode/ucnv.h>
63
#include <unicode/utf8.h>
64
#include <unicode/utf16.h>
65
#include <unicode/timezone.h>
66
#include <unicode/ulocdata.h>
67
#include <unicode/uvernum.h>
68
#include <unicode/uversion.h>
69
#include <unicode/ustring.h>
70
71
#ifdef NODE_HAVE_SMALL_ICU
72
/* if this is defined, we have a 'secondary' entry point.
73
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
74
#define SMALL_ICUDATA_ENTRY_POINT \
75
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
76
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
77
#ifndef U_LIB_SUFFIX_C_NAME
78
#define SMALL_DEF(major, suff) icusmdt##major##_dat
79
#else
80
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
81
#endif
82
83
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
84
#endif
85
86
namespace node {
87
88
using v8::Context;
89
using v8::FunctionCallbackInfo;
90
using v8::FunctionTemplate;
91
using v8::Int32;
92
using v8::Isolate;
93
using v8::Local;
94
using v8::MaybeLocal;
95
using v8::NewStringType;
96
using v8::Object;
97
using v8::ObjectTemplate;
98
using v8::String;
99
using v8::Uint8Array;
100
using v8::Value;
101
102
namespace i18n {
103
namespace {
104
105
template <typename T>
106
7622
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
107
7622
  MaybeLocal<Object> ret = Buffer::New(env, buf);
108

7622
  if (ret.IsEmpty())
109
    return ret;
110
111
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
112
                "Currently only one- or two-byte buffers are supported");
113

7622
  if (sizeof(T) > 1 && IsBigEndian()) {
114
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
115
    SwapBytes16(retbuf_data, retbuf_length);
116
  }
117
118
7622
  return ret;
119
}
120
121
// One-Shot Converters
122
123
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
124
                      const char* data,
125
                      const size_t length,
126
                      const size_t length_in_chars) {
127
2
  dest->AllocateSufficientStorage(length_in_chars);
128
2
  char* dst = reinterpret_cast<char*>(**dest);
129
2
  memcpy(dst, data, length);
130
2
  if (IsBigEndian()) {
131
    SwapBytes16(dst, length);
132
  }
133
2
}
134
135
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
136
                                            const char* fromEncoding,
137
                                            const char* toEncoding,
138
                                            const char* source,
139
                                            const size_t source_length,
140
                                            UErrorCode* status);
141
142
2
MaybeLocal<Object> Transcode(Environment* env,
143
                             const char* fromEncoding,
144
                             const char* toEncoding,
145
                             const char* source,
146
                             const size_t source_length,
147
                             UErrorCode* status) {
148
2
  *status = U_ZERO_ERROR;
149
  MaybeLocal<Object> ret;
150
4
  MaybeStackBuffer<char> result;
151
4
  Converter to(toEncoding, "?");
152
4
  Converter from(fromEncoding);
153
2
  const uint32_t limit = source_length * to.max_char_size();
154
2
  result.AllocateSufficientStorage(limit);
155
2
  char* target = *result;
156
2
  ucnv_convertEx(to.conv(), from.conv(), &target, target + limit,
157
                 &source, source + source_length, nullptr, nullptr,
158
2
                 nullptr, nullptr, true, true, status);
159
2
  if (U_SUCCESS(*status)) {
160
2
    result.SetLength(target - &result[0]);
161
2
    ret = ToBufferEndian(env, &result);
162
  }
163
4
  return ret;
164
}
165
166
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
167
                                   const char* fromEncoding,
168
                                   const char* toEncoding,
169
                                   const char* source,
170
                                   const size_t source_length,
171
                                   UErrorCode* status) {
172
4
  *status = U_ZERO_ERROR;
173
  MaybeLocal<Object> ret;
174
8
  MaybeStackBuffer<UChar> destbuf(source_length);
175
8
  Converter from(fromEncoding);
176
4
  const size_t length_in_chars = source_length * sizeof(UChar);
177
4
  ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
178
4
                source, source_length, status);
179
4
  if (U_SUCCESS(*status))
180
4
    ret = ToBufferEndian(env, &destbuf);
181
8
  return ret;
182
}
183
184
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
185
                                     const char* fromEncoding,
186
                                     const char* toEncoding,
187
                                     const char* source,
188
                                     const size_t source_length,
189
                                     UErrorCode* status) {
190
  *status = U_ZERO_ERROR;
191
  MaybeStackBuffer<UChar> sourcebuf;
192
  MaybeLocal<Object> ret;
193
  Converter to(toEncoding, "?");
194
  const size_t length_in_chars = source_length / sizeof(UChar);
195
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
196
  MaybeStackBuffer<char> destbuf(length_in_chars);
197
  const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars,
198
                                       *sourcebuf, length_in_chars, status);
199
  if (U_SUCCESS(*status)) {
200
    destbuf.SetLength(len);
201
    ret = ToBufferEndian(env, &destbuf);
202
  }
203
  return ret;
204
}
205
206
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
207
                                         const char* fromEncoding,
208
                                         const char* toEncoding,
209
                                         const char* source,
210
                                         const size_t source_length,
211
                                         UErrorCode* status) {
212
2
  *status = U_ZERO_ERROR;
213
4
  MaybeStackBuffer<UChar> destbuf;
214
  int32_t result_length;
215
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
216
2
                source, source_length, status);
217
  MaybeLocal<Object> ret;
218
2
  if (U_SUCCESS(*status)) {
219
1
    destbuf.SetLength(result_length);
220
1
    ret = ToBufferEndian(env, &destbuf);
221
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
222
1
    *status = U_ZERO_ERROR;
223
1
    destbuf.AllocateSufficientStorage(result_length);
224
2
    u_strFromUTF8(*destbuf, result_length, &result_length,
225
2
                  source, source_length, status);
226
1
    if (U_SUCCESS(*status)) {
227
1
      destbuf.SetLength(result_length);
228
1
      ret = ToBufferEndian(env, &destbuf);
229
    }
230
  }
231
4
  return ret;
232
}
233
234
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
235
                                         const char* fromEncoding,
236
                                         const char* toEncoding,
237
                                         const char* source,
238
                                         const size_t source_length,
239
                                         UErrorCode* status) {
240
2
  *status = U_ZERO_ERROR;
241
  MaybeLocal<Object> ret;
242
2
  const size_t length_in_chars = source_length / sizeof(UChar);
243
  int32_t result_length;
244
4
  MaybeStackBuffer<UChar> sourcebuf;
245
4
  MaybeStackBuffer<char> destbuf;
246
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
247
4
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
248
4
              *sourcebuf, length_in_chars, status);
249
2
  if (U_SUCCESS(*status)) {
250
1
    destbuf.SetLength(result_length);
251
1
    ret = ToBufferEndian(env, &destbuf);
252
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
253
1
    *status = U_ZERO_ERROR;
254
1
    destbuf.AllocateSufficientStorage(result_length);
255
2
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
256
2
                length_in_chars, status);
257
1
    if (U_SUCCESS(*status)) {
258
1
      destbuf.SetLength(result_length);
259
1
      ret = ToBufferEndian(env, &destbuf);
260
    }
261
  }
262
4
  return ret;
263
}
264
265
20
const char* EncodingName(const enum encoding encoding) {
266

20
  switch (encoding) {
267
2
    case ASCII: return "us-ascii";
268
4
    case LATIN1: return "iso8859-1";
269
8
    case UCS2: return "utf16le";
270
6
    case UTF8: return "utf-8";
271
    default: return nullptr;
272
  }
273
}
274
275
22
bool SupportedEncoding(const enum encoding encoding) {
276
22
  switch (encoding) {
277
    case ASCII:
278
    case LATIN1:
279
    case UCS2:
280
20
    case UTF8: return true;
281
2
    default: return false;
282
  }
283
}
284
285
12
void Transcode(const FunctionCallbackInfo<Value>&args) {
286
12
  Environment* env = Environment::GetCurrent(args);
287
12
  Isolate* isolate = env->isolate();
288
12
  UErrorCode status = U_ZERO_ERROR;
289
  MaybeLocal<Object> result;
290
291
12
  ArrayBufferViewContents<char> input(args[0]);
292
12
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
293
12
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
294
295

12
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
296
10
    TranscodeFunc tfn = &Transcode;
297

10
    switch (fromEncoding) {
298
      case ASCII:
299
      case LATIN1:
300
4
        if (toEncoding == UCS2)
301
4
          tfn = &TranscodeToUcs2;
302
4
        break;
303
      case UTF8:
304
4
        if (toEncoding == UCS2)
305
2
          tfn = &TranscodeUcs2FromUtf8;
306
4
        break;
307
      case UCS2:
308
2
        switch (toEncoding) {
309
          case UCS2:
310
            tfn = &Transcode;
311
            break;
312
          case UTF8:
313
2
            tfn = &TranscodeUtf8FromUcs2;
314
2
            break;
315
          default:
316
            tfn = &TranscodeFromUcs2;
317
        }
318
2
        break;
319
      default:
320
        // This should not happen because of the SupportedEncoding checks
321
        ABORT();
322
    }
323
324
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
325
10
                 input.data(), input.length(), &status);
326
  } else {
327
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
328
  }
329
330
12
  if (result.IsEmpty())
331
6
    return args.GetReturnValue().Set(status);
332
333
20
  return args.GetReturnValue().Set(result.ToLocalChecked());
334
}
335
336
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
337
2
  Environment* env = Environment::GetCurrent(args);
338
4
  CHECK(args[0]->IsInt32());
339
6
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
340
4
  args.GetReturnValue().Set(
341
4
      String::NewFromUtf8(env->isolate(),
342
2
                          u_errorName(status)).ToLocalChecked());
343
2
}
344
345
}  // anonymous namespace
346
347
8
Converter::Converter(const char* name, const char* sub) {
348
8
  UErrorCode status = U_ZERO_ERROR;
349
8
  UConverter* conv = ucnv_open(name, &status);
350
8
  CHECK(U_SUCCESS(status));
351
8
  conv_.reset(conv);
352
8
  set_subst_chars(sub);
353
8
}
354
355
10887
Converter::Converter(UConverter* converter, const char* sub)
356
10887
    : conv_(converter) {
357
10887
  set_subst_chars(sub);
358
10886
}
359
360
10895
void Converter::set_subst_chars(const char* sub) {
361
10895
  CHECK(conv_);
362
10894
  UErrorCode status = U_ZERO_ERROR;
363
10894
  if (sub != nullptr) {
364
2
    ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status);
365
2
    CHECK(U_SUCCESS(status));
366
  }
367
10894
}
368
369
7428
void Converter::reset() {
370
7428
  ucnv_reset(conv_.get());
371
7428
}
372
373
7846
size_t Converter::min_char_size() const {
374
7846
  CHECK(conv_);
375
7846
  return ucnv_getMinCharSize(conv_.get());
376
}
377
378
2
size_t Converter::max_char_size() const {
379
2
  CHECK(conv_);
380
2
  return ucnv_getMaxCharSize(conv_.get());
381
}
382
383
2
void ConverterObject::Has(const FunctionCallbackInfo<Value>& args) {
384
2
  Environment* env = Environment::GetCurrent(args);
385
386
2
  CHECK_GE(args.Length(), 1);
387
4
  Utf8Value label(env->isolate(), args[0]);
388
389
2
  UErrorCode status = U_ZERO_ERROR;
390
4
  ConverterPointer conv(ucnv_open(*label, &status));
391
6
  args.GetReturnValue().Set(!!U_SUCCESS(status));
392
2
}
393
394
10885
void ConverterObject::Create(const FunctionCallbackInfo<Value>& args) {
395
10885
  Environment* env = Environment::GetCurrent(args);
396
397
10885
  Local<ObjectTemplate> t = env->i18n_converter_template();
398
  Local<Object> obj;
399
32659
  if (!t->NewInstance(env->context()).ToLocal(&obj)) return;
400
401
10886
  CHECK_GE(args.Length(), 2);
402
21773
  Utf8Value label(env->isolate(), args[0]);
403
43548
  int flags = args[1]->Uint32Value(env->context()).ToChecked();
404
  bool fatal =
405
10887
      (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
406
407
10887
  UErrorCode status = U_ZERO_ERROR;
408
10887
  UConverter* conv = ucnv_open(*label, &status);
409
10887
  if (U_FAILURE(status))
410
    return;
411
412
10887
  if (fatal) {
413
7006
    status = U_ZERO_ERROR;
414
    ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
415
7006
                        nullptr, nullptr, nullptr, &status);
416
  }
417
418
10887
  new ConverterObject(env, obj, conv, flags);
419
21774
  args.GetReturnValue().Set(obj);
420
}
421
422
7846
void ConverterObject::Decode(const FunctionCallbackInfo<Value>& args) {
423
7846
  Environment* env = Environment::GetCurrent(args);
424
425
7846
  CHECK_GE(args.Length(), 3);  // Converter, Buffer, Flags
426
427
  ConverterObject* converter;
428
23304
  ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
429
7846
  ArrayBufferViewContents<char> input(args[1]);
430
31384
  int flags = args[2]->Uint32Value(env->context()).ToChecked();
431
432
7846
  UErrorCode status = U_ZERO_ERROR;
433
8080
  MaybeStackBuffer<UChar> result;
434
  MaybeLocal<Object> ret;
435
7846
  size_t limit = converter->min_char_size() * input.length();
436
7846
  if (limit > 0)
437
7816
    result.AllocateSufficientStorage(limit);
438
439
7846
  UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
440
7846
  auto cleanup = OnScopeLeave([&]() {
441
7846
    if (flush) {
442
      // Reset the converter state.
443
7428
      converter->set_bom_seen(false);
444
7428
      converter->reset();
445
    }
446
15926
  });
447
448
7846
  const char* source = input.data();
449
7846
  size_t source_length = input.length();
450
451
7846
  UChar* target = *result;
452
15692
  ucnv_toUnicode(converter->conv(),
453
                 &target,
454
7846
                 target + (limit * sizeof(UChar)),
455
                 &source,
456
                 source + source_length,
457
                 nullptr,
458
                 flush,
459
7846
                 &status);
460
461
7846
  if (U_SUCCESS(status)) {
462
7612
    bool omit_initial_bom = false;
463
7612
    if (limit > 0) {
464
7582
      result.SetLength(target - &result[0]);
465

22638
      if (result.length() > 0 &&
466
8144
          converter->unicode() &&
467

8908
          !converter->ignore_bom() &&
468
656
          !converter->bom_seen()) {
469
        // If the very first result in the stream is a BOM, and we are not
470
        // explicitly told to ignore it, then we mark it for discarding.
471
376
        if (result[0] == 0xFEFF)
472
23
          omit_initial_bom = true;
473
376
        converter->set_bom_seen(true);
474
      }
475
    }
476
7612
    ret = ToBufferEndian(env, &result);
477

7635
    if (omit_initial_bom && !ret.IsEmpty()) {
478
      // Peform `ret = ret.slice(2)`.
479
46
      CHECK(ret.ToLocalChecked()->IsUint8Array());
480
46
      Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>();
481
69
      ret = Buffer::New(env,
482
23
                        orig_ret->Buffer(),
483
23
                        orig_ret->ByteOffset() + 2,
484
69
                        orig_ret->ByteLength() - 2)
485
46
                            .FromMaybe(Local<Uint8Array>());
486
    }
487
7612
    if (!ret.IsEmpty())
488
15224
      args.GetReturnValue().Set(ret.ToLocalChecked());
489
7612
    return;
490
  }
491
492
702
  args.GetReturnValue().Set(status);
493
}
494
495
10887
ConverterObject::ConverterObject(
496
    Environment* env,
497
    Local<Object> wrap,
498
    UConverter* converter,
499
    int flags,
500
10887
    const char* sub)
501
    : BaseObject(env, wrap),
502
      Converter(converter, sub),
503
10887
      flags_(flags) {
504
10886
  MakeWeak();
505
506
10886
  switch (ucnv_getType(converter)) {
507
    case UCNV_UTF8:
508
    case UCNV_UTF16_BigEndian:
509
    case UCNV_UTF16_LittleEndian:
510
422
      flags_ |= CONVERTER_FLAGS_UNICODE;
511
422
      break;
512
    default: {
513
      // Fall through
514
    }
515
  }
516
10887
}
517
518
519
4426
bool InitializeICUDirectory(const std::string& path) {
520
4426
  UErrorCode status = U_ZERO_ERROR;
521
4426
  if (path.empty()) {
522
#ifdef NODE_HAVE_SMALL_ICU
523
    // install the 'small' data.
524
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
525
#else  // !NODE_HAVE_SMALL_ICU
526
    // no small data, so nothing to do.
527
#endif  // !NODE_HAVE_SMALL_ICU
528
  } else {
529
    u_setDataDirectory(path.c_str());
530
    u_init(&status);
531
  }
532
4426
  return status == U_ZERO_ERROR;
533
}
534
535
382
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
536
                  const char* input,
537
                  size_t length) {
538
382
  UErrorCode status = U_ZERO_ERROR;
539
382
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
540
382
  UIDNA* uidna = uidna_openUTS46(options, &status);
541
382
  if (U_FAILURE(status))
542
    return -1;
543
382
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
544
545
382
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
546
                                        input, length,
547
382
                                        **buf, buf->capacity(),
548
                                        &info,
549
382
                                        &status);
550
551
  // Do not check info.errors like we do with ToASCII since ToUnicode always
552
  // returns a string, despite any possible errors that may have occurred.
553
554
382
  if (status == U_BUFFER_OVERFLOW_ERROR) {
555
    status = U_ZERO_ERROR;
556
    buf->AllocateSufficientStorage(len);
557
    len = uidna_nameToUnicodeUTF8(uidna,
558
                                  input, length,
559
                                  **buf, buf->capacity(),
560
                                  &info,
561
                                  &status);
562
  }
563
564
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
565
  // string, regardless of whether an error occurred.
566
567
382
  if (U_FAILURE(status)) {
568
    len = -1;
569
    buf->SetLength(0);
570
  } else {
571
382
    buf->SetLength(len);
572
  }
573
574
382
  uidna_close(uidna);
575
382
  return len;
576
}
577
578
12421
int32_t ToASCII(MaybeStackBuffer<char>* buf,
579
                const char* input,
580
                size_t length,
581
                enum idna_mode mode) {
582
12421
  UErrorCode status = U_ZERO_ERROR;
583
  uint32_t options =                  // CheckHyphens = false; handled later
584
    UIDNA_CHECK_BIDI |                // CheckBidi = true
585
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
586
12421
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
587
12421
  if (mode == IDNA_STRICT) {
588
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
589
                                      // VerifyDnsLength = beStrict;
590
                                      //   handled later
591
  }
592
593
12421
  UIDNA* uidna = uidna_openUTS46(options, &status);
594
12421
  if (U_FAILURE(status))
595
    return -1;
596
12421
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
597
598
12421
  int32_t len = uidna_nameToASCII_UTF8(uidna,
599
                                       input, length,
600
12421
                                       **buf, buf->capacity(),
601
                                       &info,
602
12421
                                       &status);
603
604
12421
  if (status == U_BUFFER_OVERFLOW_ERROR) {
605
2
    status = U_ZERO_ERROR;
606
2
    buf->AllocateSufficientStorage(len);
607
2
    len = uidna_nameToASCII_UTF8(uidna,
608
                                 input, length,
609
2
                                 **buf, buf->capacity(),
610
                                 &info,
611
2
                                 &status);
612
  }
613
614
  // In UTS #46 which specifies ToASCII, certain error conditions are
615
  // configurable through options, and the WHATWG URL Standard promptly elects
616
  // to disable some of them to accommodate for real-world use cases.
617
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
618
  // these options through `options` above, and thus continues throwing
619
  // unnecessary errors. To counter this situation, we just filter out the
620
  // errors that may have happened afterwards, before deciding whether to
621
  // return an error from this function.
622
623
  // CheckHyphens = false
624
  // (Specified in the current UTS #46 draft rev. 18.)
625
  // Refs:
626
  // - https://github.com/whatwg/url/issues/53
627
  // - https://github.com/whatwg/url/pull/309
628
  // - http://www.unicode.org/review/pri317/
629
  // - http://www.unicode.org/reports/tr46/tr46-18.html
630
  // - https://www.icann.org/news/announcement-2000-01-07-en
631
12421
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
632
12421
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
633
12421
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
634
635
12421
  if (mode != IDNA_STRICT) {
636
    // VerifyDnsLength = beStrict
637
12421
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
638
12421
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
639
12421
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
640
  }
641
642


12421
  if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
643
78
    len = -1;
644
78
    buf->SetLength(0);
645
  } else {
646
12343
    buf->SetLength(len);
647
  }
648
649
12421
  uidna_close(uidna);
650
12421
  return len;
651
}
652
653
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
654
189
  Environment* env = Environment::GetCurrent(args);
655
189
  CHECK_GE(args.Length(), 1);
656
567
  CHECK(args[0]->IsString());
657
378
  Utf8Value val(env->isolate(), args[0]);
658
659
378
  MaybeStackBuffer<char> buf;
660
189
  int32_t len = ToUnicode(&buf, *val, val.length());
661
662
189
  if (len < 0) {
663
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
664
  }
665
666
378
  args.GetReturnValue().Set(
667
378
      String::NewFromUtf8(env->isolate(),
668
189
                          *buf,
669
                          NewStringType::kNormal,
670
189
                          len).ToLocalChecked());
671
}
672
673
10198
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
674
10198
  Environment* env = Environment::GetCurrent(args);
675
10198
  CHECK_GE(args.Length(), 1);
676
30594
  CHECK(args[0]->IsString());
677
20387
  Utf8Value val(env->isolate(), args[0]);
678
  // optional arg
679
30594
  bool lenient = args[1]->BooleanValue(env->isolate());
680
10198
  enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
681
682
20387
  MaybeStackBuffer<char> buf;
683
10198
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
684
685
10198
  if (len < 0) {
686
9
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
687
  }
688
689
20378
  args.GetReturnValue().Set(
690
20378
      String::NewFromUtf8(env->isolate(),
691
10189
                          *buf,
692
                          NewStringType::kNormal,
693
10189
                          len).ToLocalChecked());
694
}
695
696
// This is similar to wcwidth except that it takes the current unicode
697
// character properties database into consideration, allowing it to
698
// correctly calculate the column widths of things like emoji's and
699
// newer wide characters. wcwidth, on the other hand, uses a fixed
700
// algorithm that does not take things like emoji into proper
701
// consideration.
702
//
703
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
704
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
705
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
706
// allow it to be input. Linux's PTY terminal prints control characters as
707
// Narrow rhombi.
708
//
709
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
710
// consonants are 0-width when combined with initial consonants; otherwise they
711
// are technically Wide. But many terminals (including Konsole and
712
// VTE/GLib-based) implement all medials and finals as 0-width.
713
//
714
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
715
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
716
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
717
19100
static int GetColumnWidth(UChar32 codepoint,
718
                          bool ambiguous_as_full_width = false) {
719
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
720
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
721
  // or halfwidth.
722
19100
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
723

19100
  switch (eaw) {
724
    case U_EA_FULLWIDTH:
725
    case U_EA_WIDE:
726
1932
      return 2;
727
    case U_EA_AMBIGUOUS:
728
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
729
3318
      if (ambiguous_as_full_width) {
730
        return 2;
731
      }
732
      // If ambiguous_as_full_width is false:
733
      // Fall through
734
    case U_EA_NEUTRAL:
735
5357
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
736
        return 2;
737
      }
738
      // Fall through
739
    case U_EA_HALFWIDTH:
740
    case U_EA_NARROW:
741
    default:
742
      const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
743
                                  U_GC_CF_MASK |  // Format control character
744
                                  U_GC_ME_MASK |  // Enclosing mark
745
17168
                                  U_GC_MN_MASK;   // Nonspacing mark
746

34514
      if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
747
34074
          ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
748
34074
          u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
749
206
        return 0;
750
      }
751
16962
      return 1;
752
  }
753
}
754
755
// Returns the column width for the given String.
756
1253
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
757
1253
  Environment* env = Environment::GetCurrent(args);
758
3759
  CHECK(args[0]->IsString());
759
760
2506
  bool ambiguous_as_full_width = args[1]->IsTrue();
761

2506
  bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue();
762
763
2506
  TwoByteValue value(env->isolate(), args[0]);
764
  // reinterpret_cast is required by windows to compile
765
1253
  UChar* str = reinterpret_cast<UChar*>(*value);
766
  static_assert(sizeof(*str) == sizeof(**value),
767
                "sizeof(*str) == sizeof(**value)");
768
1253
  UChar32 c = 0;
769
  UChar32 p;
770
1253
  size_t n = 0;
771
1253
  uint32_t width = 0;
772
773
39453
  while (n < value.length()) {
774
19100
    p = c;
775


19100
    U16_NEXT(str, n, value.length(), c);
776
    // Don't count individual emoji codepoints that occur within an
777
    // emoji sequence. This is not necessarily foolproof. Some
778
    // environments display emoji sequences in the appropriate
779
    // condensed form (as a single emoji glyph), other environments
780
    // may not understand an emoji sequence and will display each
781
    // individual emoji separately. When this happens, the width
782
    // calculated will be off, and there's no reliable way of knowing
783
    // in advance if a particular sequence is going to be supported.
784
    // The expand_emoji_sequence option allows the caller to skip this
785
    // check and count each code within an emoji sequence separately.
786
    // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
787

38200
    if (!expand_emoji_sequence &&
788

19100
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
789
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
790
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
791
      continue;
792
    }
793
19100
    width += GetColumnWidth(c, ambiguous_as_full_width);
794
  }
795
2506
  args.GetReturnValue().Set(width);
796
1253
}
797
798
451
void Initialize(Local<Object> target,
799
                Local<Value> unused,
800
                Local<Context> context,
801
                void* priv) {
802
451
  Environment* env = Environment::GetCurrent(context);
803
452
  env->SetMethod(target, "toUnicode", ToUnicode);
804
451
  env->SetMethod(target, "toASCII", ToASCII);
805
451
  env->SetMethod(target, "getStringWidth", GetStringWidth);
806
807
  // One-shot converters
808
451
  env->SetMethod(target, "icuErrName", ICUErrorName);
809
451
  env->SetMethod(target, "transcode", Transcode);
810
811
  // ConverterObject
812
  {
813
452
    Local<FunctionTemplate> t = FunctionTemplate::New(env->isolate());
814
904
    t->Inherit(BaseObject::GetConstructorTemplate(env));
815
1356
    t->InstanceTemplate()->SetInternalFieldCount(
816
452
        ConverterObject::kInternalFieldCount);
817
    Local<String> converter_string =
818
452
        FIXED_ONE_BYTE_STRING(env->isolate(), "Converter");
819
452
    t->SetClassName(converter_string);
820
452
    env->set_i18n_converter_template(t->InstanceTemplate());
821
  }
822
823
452
  env->SetMethod(target, "getConverter", ConverterObject::Create);
824
452
  env->SetMethod(target, "decode", ConverterObject::Decode);
825
452
  env->SetMethod(target, "hasConverter", ConverterObject::Has);
826
452
}
827
828
4402
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
829
4402
  registry->Register(ToUnicode);
830
4402
  registry->Register(ToASCII);
831
4402
  registry->Register(GetStringWidth);
832
4402
  registry->Register(ICUErrorName);
833
4402
  registry->Register(Transcode);
834
4402
  registry->Register(ConverterObject::Create);
835
4402
  registry->Register(ConverterObject::Decode);
836
4402
  registry->Register(ConverterObject::Has);
837
4402
}
838
839
}  // namespace i18n
840
}  // namespace node
841
842
4471
NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize)
843

17818
NODE_MODULE_EXTERNAL_REFERENCE(icu, node::i18n::RegisterExternalReferences)
844
845
#endif  // NODE_HAVE_I18N_SUPPORT