GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_i18n.cc Lines: 343 384 89.3 %
Date: 2020-05-27 22:15:15 Branches: 143 222 64.4 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
45
#if defined(NODE_HAVE_I18N_SUPPORT)
46
47
#include "base_object-inl.h"
48
#include "node.h"
49
#include "node_buffer.h"
50
#include "node_errors.h"
51
#include "node_internals.h"
52
#include "util-inl.h"
53
#include "v8.h"
54
55
#include <unicode/utypes.h>
56
#include <unicode/putil.h>
57
#include <unicode/uchar.h>
58
#include <unicode/uclean.h>
59
#include <unicode/udata.h>
60
#include <unicode/uidna.h>
61
#include <unicode/ucnv.h>
62
#include <unicode/utf8.h>
63
#include <unicode/utf16.h>
64
#include <unicode/timezone.h>
65
#include <unicode/ulocdata.h>
66
#include <unicode/uvernum.h>
67
#include <unicode/uversion.h>
68
#include <unicode/ustring.h>
69
70
#ifdef NODE_HAVE_SMALL_ICU
71
/* if this is defined, we have a 'secondary' entry point.
72
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
73
#define SMALL_ICUDATA_ENTRY_POINT \
74
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
75
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
76
#ifndef U_LIB_SUFFIX_C_NAME
77
#define SMALL_DEF(major, suff) icusmdt##major##_dat
78
#else
79
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
80
#endif
81
82
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
83
#endif
84
85
namespace node {
86
87
using v8::Context;
88
using v8::FunctionCallbackInfo;
89
using v8::FunctionTemplate;
90
using v8::Int32;
91
using v8::Isolate;
92
using v8::Local;
93
using v8::MaybeLocal;
94
using v8::NewStringType;
95
using v8::Object;
96
using v8::ObjectTemplate;
97
using v8::String;
98
using v8::Uint8Array;
99
using v8::Value;
100
101
namespace i18n {
102
namespace {
103
104
template <typename T>
105
7598
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
106
7598
  MaybeLocal<Object> ret = Buffer::New(env, buf);
107

7598
  if (ret.IsEmpty())
108
    return ret;
109
110
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
111
                "Currently only one- or two-byte buffers are supported");
112

7598
  if (sizeof(T) > 1 && IsBigEndian()) {
113
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
114
    SwapBytes16(retbuf_data, retbuf_length);
115
  }
116
117
7598
  return ret;
118
}
119
120
// One-Shot Converters
121
122
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
123
                      const char* data,
124
                      const size_t length,
125
                      const size_t length_in_chars) {
126
2
  dest->AllocateSufficientStorage(length_in_chars);
127
2
  char* dst = reinterpret_cast<char*>(**dest);
128
2
  memcpy(dst, data, length);
129
2
  if (IsBigEndian()) {
130
    SwapBytes16(dst, length);
131
  }
132
2
}
133
134
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
135
                                            const char* fromEncoding,
136
                                            const char* toEncoding,
137
                                            const char* source,
138
                                            const size_t source_length,
139
                                            UErrorCode* status);
140
141
2
MaybeLocal<Object> Transcode(Environment* env,
142
                             const char* fromEncoding,
143
                             const char* toEncoding,
144
                             const char* source,
145
                             const size_t source_length,
146
                             UErrorCode* status) {
147
2
  *status = U_ZERO_ERROR;
148
  MaybeLocal<Object> ret;
149
4
  MaybeStackBuffer<char> result;
150
4
  Converter to(toEncoding, "?");
151
4
  Converter from(fromEncoding);
152
2
  const uint32_t limit = source_length * to.max_char_size();
153
2
  result.AllocateSufficientStorage(limit);
154
2
  char* target = *result;
155
2
  ucnv_convertEx(to.conv(), from.conv(), &target, target + limit,
156
                 &source, source + source_length, nullptr, nullptr,
157
2
                 nullptr, nullptr, true, true, status);
158
2
  if (U_SUCCESS(*status)) {
159
2
    result.SetLength(target - &result[0]);
160
2
    ret = ToBufferEndian(env, &result);
161
  }
162
4
  return ret;
163
}
164
165
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
166
                                   const char* fromEncoding,
167
                                   const char* toEncoding,
168
                                   const char* source,
169
                                   const size_t source_length,
170
                                   UErrorCode* status) {
171
4
  *status = U_ZERO_ERROR;
172
  MaybeLocal<Object> ret;
173
8
  MaybeStackBuffer<UChar> destbuf(source_length);
174
8
  Converter from(fromEncoding);
175
4
  const size_t length_in_chars = source_length * sizeof(UChar);
176
4
  ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
177
4
                source, source_length, status);
178
4
  if (U_SUCCESS(*status))
179
4
    ret = ToBufferEndian(env, &destbuf);
180
8
  return ret;
181
}
182
183
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
184
                                     const char* fromEncoding,
185
                                     const char* toEncoding,
186
                                     const char* source,
187
                                     const size_t source_length,
188
                                     UErrorCode* status) {
189
  *status = U_ZERO_ERROR;
190
  MaybeStackBuffer<UChar> sourcebuf;
191
  MaybeLocal<Object> ret;
192
  Converter to(toEncoding, "?");
193
  const size_t length_in_chars = source_length / sizeof(UChar);
194
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
195
  MaybeStackBuffer<char> destbuf(length_in_chars);
196
  const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars,
197
                                       *sourcebuf, length_in_chars, status);
198
  if (U_SUCCESS(*status)) {
199
    destbuf.SetLength(len);
200
    ret = ToBufferEndian(env, &destbuf);
201
  }
202
  return ret;
203
}
204
205
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
206
                                         const char* fromEncoding,
207
                                         const char* toEncoding,
208
                                         const char* source,
209
                                         const size_t source_length,
210
                                         UErrorCode* status) {
211
2
  *status = U_ZERO_ERROR;
212
4
  MaybeStackBuffer<UChar> destbuf;
213
  int32_t result_length;
214
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
215
2
                source, source_length, status);
216
  MaybeLocal<Object> ret;
217
2
  if (U_SUCCESS(*status)) {
218
1
    destbuf.SetLength(result_length);
219
1
    ret = ToBufferEndian(env, &destbuf);
220
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
221
1
    *status = U_ZERO_ERROR;
222
1
    destbuf.AllocateSufficientStorage(result_length);
223
2
    u_strFromUTF8(*destbuf, result_length, &result_length,
224
2
                  source, source_length, status);
225
1
    if (U_SUCCESS(*status)) {
226
1
      destbuf.SetLength(result_length);
227
1
      ret = ToBufferEndian(env, &destbuf);
228
    }
229
  }
230
4
  return ret;
231
}
232
233
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
234
                                         const char* fromEncoding,
235
                                         const char* toEncoding,
236
                                         const char* source,
237
                                         const size_t source_length,
238
                                         UErrorCode* status) {
239
2
  *status = U_ZERO_ERROR;
240
  MaybeLocal<Object> ret;
241
2
  const size_t length_in_chars = source_length / sizeof(UChar);
242
  int32_t result_length;
243
4
  MaybeStackBuffer<UChar> sourcebuf;
244
4
  MaybeStackBuffer<char> destbuf;
245
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
246
4
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
247
4
              *sourcebuf, length_in_chars, status);
248
2
  if (U_SUCCESS(*status)) {
249
1
    destbuf.SetLength(result_length);
250
1
    ret = ToBufferEndian(env, &destbuf);
251
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
252
1
    *status = U_ZERO_ERROR;
253
1
    destbuf.AllocateSufficientStorage(result_length);
254
2
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
255
2
                length_in_chars, status);
256
1
    if (U_SUCCESS(*status)) {
257
1
      destbuf.SetLength(result_length);
258
1
      ret = ToBufferEndian(env, &destbuf);
259
    }
260
  }
261
4
  return ret;
262
}
263
264
20
const char* EncodingName(const enum encoding encoding) {
265

20
  switch (encoding) {
266
2
    case ASCII: return "us-ascii";
267
4
    case LATIN1: return "iso8859-1";
268
8
    case UCS2: return "utf16le";
269
6
    case UTF8: return "utf-8";
270
    default: return nullptr;
271
  }
272
}
273
274
22
bool SupportedEncoding(const enum encoding encoding) {
275
22
  switch (encoding) {
276
    case ASCII:
277
    case LATIN1:
278
    case UCS2:
279
20
    case UTF8: return true;
280
2
    default: return false;
281
  }
282
}
283
284
12
void Transcode(const FunctionCallbackInfo<Value>&args) {
285
12
  Environment* env = Environment::GetCurrent(args);
286
12
  Isolate* isolate = env->isolate();
287
12
  UErrorCode status = U_ZERO_ERROR;
288
  MaybeLocal<Object> result;
289
290
12
  ArrayBufferViewContents<char> input(args[0]);
291
12
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
292
12
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
293
294

12
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
295
10
    TranscodeFunc tfn = &Transcode;
296

10
    switch (fromEncoding) {
297
      case ASCII:
298
      case LATIN1:
299
4
        if (toEncoding == UCS2)
300
4
          tfn = &TranscodeToUcs2;
301
4
        break;
302
      case UTF8:
303
4
        if (toEncoding == UCS2)
304
2
          tfn = &TranscodeUcs2FromUtf8;
305
4
        break;
306
      case UCS2:
307
2
        switch (toEncoding) {
308
          case UCS2:
309
            tfn = &Transcode;
310
            break;
311
          case UTF8:
312
2
            tfn = &TranscodeUtf8FromUcs2;
313
2
            break;
314
          default:
315
            tfn = &TranscodeFromUcs2;
316
        }
317
2
        break;
318
      default:
319
        // This should not happen because of the SupportedEncoding checks
320
        ABORT();
321
    }
322
323
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
324
10
                 input.data(), input.length(), &status);
325
  } else {
326
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
327
  }
328
329
12
  if (result.IsEmpty())
330
6
    return args.GetReturnValue().Set(status);
331
332
20
  return args.GetReturnValue().Set(result.ToLocalChecked());
333
}
334
335
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
336
2
  Environment* env = Environment::GetCurrent(args);
337
4
  CHECK(args[0]->IsInt32());
338
6
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
339
4
  args.GetReturnValue().Set(
340
4
      String::NewFromUtf8(env->isolate(),
341
                          u_errorName(status),
342
2
                          NewStringType::kNormal).ToLocalChecked());
343
2
}
344
345
}  // anonymous namespace
346
347
8
Converter::Converter(const char* name, const char* sub) {
348
8
  UErrorCode status = U_ZERO_ERROR;
349
8
  UConverter* conv = ucnv_open(name, &status);
350
8
  CHECK(U_SUCCESS(status));
351
8
  conv_.reset(conv);
352
8
  set_subst_chars(sub);
353
8
}
354
355
10871
Converter::Converter(UConverter* converter, const char* sub)
356
10871
    : conv_(converter) {
357
10871
  set_subst_chars(sub);
358
10871
}
359
360
10879
void Converter::set_subst_chars(const char* sub) {
361
10879
  CHECK(conv_);
362
10879
  UErrorCode status = U_ZERO_ERROR;
363
10879
  if (sub != nullptr) {
364
2
    ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status);
365
2
    CHECK(U_SUCCESS(status));
366
  }
367
10879
}
368
369
7404
void Converter::reset() {
370
7404
  ucnv_reset(conv_.get());
371
7404
}
372
373
7822
size_t Converter::min_char_size() const {
374
7822
  CHECK(conv_);
375
7822
  return ucnv_getMinCharSize(conv_.get());
376
}
377
378
2
size_t Converter::max_char_size() const {
379
2
  CHECK(conv_);
380
2
  return ucnv_getMaxCharSize(conv_.get());
381
}
382
383
2
void ConverterObject::Has(const FunctionCallbackInfo<Value>& args) {
384
2
  Environment* env = Environment::GetCurrent(args);
385
386
2
  CHECK_GE(args.Length(), 1);
387
4
  Utf8Value label(env->isolate(), args[0]);
388
389
2
  UErrorCode status = U_ZERO_ERROR;
390
4
  ConverterPointer conv(ucnv_open(*label, &status));
391
6
  args.GetReturnValue().Set(!!U_SUCCESS(status));
392
2
}
393
394
10871
void ConverterObject::Create(const FunctionCallbackInfo<Value>& args) {
395
10871
  Environment* env = Environment::GetCurrent(args);
396
397
10871
  Local<ObjectTemplate> t = env->i18n_converter_template();
398
  Local<Object> obj;
399
32613
  if (!t->NewInstance(env->context()).ToLocal(&obj)) return;
400
401
10871
  CHECK_GE(args.Length(), 2);
402
21742
  Utf8Value label(env->isolate(), args[0]);
403
43484
  int flags = args[1]->Uint32Value(env->context()).ToChecked();
404
  bool fatal =
405
10871
      (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
406
407
10871
  UErrorCode status = U_ZERO_ERROR;
408
10871
  UConverter* conv = ucnv_open(*label, &status);
409
10871
  if (U_FAILURE(status))
410
    return;
411
412
10871
  if (fatal) {
413
7006
    status = U_ZERO_ERROR;
414
    ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
415
7006
                        nullptr, nullptr, nullptr, &status);
416
  }
417
418
10871
  new ConverterObject(env, obj, conv, flags);
419
21742
  args.GetReturnValue().Set(obj);
420
}
421
422
7822
void ConverterObject::Decode(const FunctionCallbackInfo<Value>& args) {
423
7822
  Environment* env = Environment::GetCurrent(args);
424
425
7822
  CHECK_GE(args.Length(), 3);  // Converter, Buffer, Flags
426
427
  ConverterObject* converter;
428
23232
  ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
429
7822
  ArrayBufferViewContents<char> input(args[1]);
430
31288
  int flags = args[2]->Uint32Value(env->context()).ToChecked();
431
432
7822
  UErrorCode status = U_ZERO_ERROR;
433
8056
  MaybeStackBuffer<UChar> result;
434
  MaybeLocal<Object> ret;
435
7822
  size_t limit = converter->min_char_size() * input.length();
436
7822
  if (limit > 0)
437
7793
    result.AllocateSufficientStorage(limit);
438
439
7822
  UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
440
7822
  auto cleanup = OnScopeLeave([&]() {
441
7822
    if (flush) {
442
      // Reset the converter state.
443
7404
      converter->set_bom_seen(false);
444
7404
      converter->reset();
445
    }
446
15878
  });
447
448
7822
  const char* source = input.data();
449
7822
  size_t source_length = input.length();
450
451
7822
  UChar* target = *result;
452
15644
  ucnv_toUnicode(converter->conv(),
453
                 &target,
454
7822
                 target + (limit * sizeof(UChar)),
455
                 &source,
456
                 source + source_length,
457
                 nullptr,
458
                 flush,
459
7822
                 &status);
460
461
7822
  if (U_SUCCESS(status)) {
462
7588
    bool omit_initial_bom = false;
463
7588
    if (limit > 0) {
464
7559
      result.SetLength(target - &result[0]);
465

22569
      if (result.length() > 0 &&
466
8098
          converter->unicode() &&
467

8839
          !converter->ignore_bom() &&
468
633
          !converter->bom_seen()) {
469
        // If the very first result in the stream is a BOM, and we are not
470
        // explicitly told to ignore it, then we mark it for discarding.
471
353
        if (result[0] == 0xFEFF)
472
23
          omit_initial_bom = true;
473
353
        converter->set_bom_seen(true);
474
      }
475
    }
476
7588
    ret = ToBufferEndian(env, &result);
477

7611
    if (omit_initial_bom && !ret.IsEmpty()) {
478
      // Peform `ret = ret.slice(2)`.
479
46
      CHECK(ret.ToLocalChecked()->IsUint8Array());
480
46
      Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>();
481
69
      ret = Buffer::New(env,
482
23
                        orig_ret->Buffer(),
483
23
                        orig_ret->ByteOffset() + 2,
484
69
                        orig_ret->ByteLength() - 2)
485
46
                            .FromMaybe(Local<Uint8Array>());
486
    }
487
7588
    if (!ret.IsEmpty())
488
15176
      args.GetReturnValue().Set(ret.ToLocalChecked());
489
7588
    return;
490
  }
491
492
702
  args.GetReturnValue().Set(status);
493
}
494
495
10871
ConverterObject::ConverterObject(
496
    Environment* env,
497
    Local<Object> wrap,
498
    UConverter* converter,
499
    int flags,
500
10871
    const char* sub)
501
    : BaseObject(env, wrap),
502
      Converter(converter, sub),
503
10871
      flags_(flags) {
504
10871
  MakeWeak();
505
506
10871
  switch (ucnv_getType(converter)) {
507
    case UCNV_UTF8:
508
    case UCNV_UTF16_BigEndian:
509
    case UCNV_UTF16_LittleEndian:
510
406
      flags_ |= CONVERTER_FLAGS_UNICODE;
511
406
      break;
512
    default: {
513
      // Fall through
514
    }
515
  }
516
10871
}
517
518
519
4278
bool InitializeICUDirectory(const std::string& path) {
520
4278
  UErrorCode status = U_ZERO_ERROR;
521
4278
  if (path.empty()) {
522
#ifdef NODE_HAVE_SMALL_ICU
523
    // install the 'small' data.
524
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
525
#else  // !NODE_HAVE_SMALL_ICU
526
    // no small data, so nothing to do.
527
#endif  // !NODE_HAVE_SMALL_ICU
528
  } else {
529
    u_setDataDirectory(path.c_str());
530
    u_init(&status);
531
  }
532
4278
  return status == U_ZERO_ERROR;
533
}
534
535
382
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
536
                  const char* input,
537
                  size_t length) {
538
382
  UErrorCode status = U_ZERO_ERROR;
539
382
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
540
382
  UIDNA* uidna = uidna_openUTS46(options, &status);
541
382
  if (U_FAILURE(status))
542
    return -1;
543
382
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
544
545
382
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
546
                                        input, length,
547
382
                                        **buf, buf->capacity(),
548
                                        &info,
549
382
                                        &status);
550
551
  // Do not check info.errors like we do with ToASCII since ToUnicode always
552
  // returns a string, despite any possible errors that may have occurred.
553
554
382
  if (status == U_BUFFER_OVERFLOW_ERROR) {
555
    status = U_ZERO_ERROR;
556
    buf->AllocateSufficientStorage(len);
557
    len = uidna_nameToUnicodeUTF8(uidna,
558
                                  input, length,
559
                                  **buf, buf->capacity(),
560
                                  &info,
561
                                  &status);
562
  }
563
564
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
565
  // string, regardless of whether an error occurred.
566
567
382
  if (U_FAILURE(status)) {
568
    len = -1;
569
    buf->SetLength(0);
570
  } else {
571
382
    buf->SetLength(len);
572
  }
573
574
382
  uidna_close(uidna);
575
382
  return len;
576
}
577
578
12233
int32_t ToASCII(MaybeStackBuffer<char>* buf,
579
                const char* input,
580
                size_t length,
581
                enum idna_mode mode) {
582
12233
  UErrorCode status = U_ZERO_ERROR;
583
  uint32_t options =                  // CheckHyphens = false; handled later
584
    UIDNA_CHECK_BIDI |                // CheckBidi = true
585
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
586
12233
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
587
12233
  if (mode == IDNA_STRICT) {
588
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
589
                                      // VerifyDnsLength = beStrict;
590
                                      //   handled later
591
  }
592
593
12233
  UIDNA* uidna = uidna_openUTS46(options, &status);
594
12233
  if (U_FAILURE(status))
595
    return -1;
596
12233
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
597
598
12233
  int32_t len = uidna_nameToASCII_UTF8(uidna,
599
                                       input, length,
600
12233
                                       **buf, buf->capacity(),
601
                                       &info,
602
12233
                                       &status);
603
604
12233
  if (status == U_BUFFER_OVERFLOW_ERROR) {
605
2
    status = U_ZERO_ERROR;
606
2
    buf->AllocateSufficientStorage(len);
607
2
    len = uidna_nameToASCII_UTF8(uidna,
608
                                 input, length,
609
2
                                 **buf, buf->capacity(),
610
                                 &info,
611
2
                                 &status);
612
  }
613
614
  // In UTS #46 which specifies ToASCII, certain error conditions are
615
  // configurable through options, and the WHATWG URL Standard promptly elects
616
  // to disable some of them to accommodate for real-world use cases.
617
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
618
  // these options through `options` above, and thus continues throwing
619
  // unnecessary errors. To counter this situation, we just filter out the
620
  // errors that may have happened afterwards, before deciding whether to
621
  // return an error from this function.
622
623
  // CheckHyphens = false
624
  // (Specified in the current UTS #46 draft rev. 18.)
625
  // Refs:
626
  // - https://github.com/whatwg/url/issues/53
627
  // - https://github.com/whatwg/url/pull/309
628
  // - http://www.unicode.org/review/pri317/
629
  // - http://www.unicode.org/reports/tr46/tr46-18.html
630
  // - https://www.icann.org/news/announcement-2000-01-07-en
631
12233
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
632
12233
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
633
12233
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
634
635
12233
  if (mode != IDNA_STRICT) {
636
    // VerifyDnsLength = beStrict
637
12233
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
638
12233
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
639
12233
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
640
  }
641
642


12233
  if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
643
78
    len = -1;
644
78
    buf->SetLength(0);
645
  } else {
646
12155
    buf->SetLength(len);
647
  }
648
649
12233
  uidna_close(uidna);
650
12233
  return len;
651
}
652
653
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
654
189
  Environment* env = Environment::GetCurrent(args);
655
189
  CHECK_GE(args.Length(), 1);
656
567
  CHECK(args[0]->IsString());
657
378
  Utf8Value val(env->isolate(), args[0]);
658
659
378
  MaybeStackBuffer<char> buf;
660
189
  int32_t len = ToUnicode(&buf, *val, val.length());
661
662
189
  if (len < 0) {
663
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
664
  }
665
666
378
  args.GetReturnValue().Set(
667
378
      String::NewFromUtf8(env->isolate(),
668
189
                          *buf,
669
                          NewStringType::kNormal,
670
189
                          len).ToLocalChecked());
671
}
672
673
10135
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
674
10135
  Environment* env = Environment::GetCurrent(args);
675
10135
  CHECK_GE(args.Length(), 1);
676
30405
  CHECK(args[0]->IsString());
677
20261
  Utf8Value val(env->isolate(), args[0]);
678
  // optional arg
679
30405
  bool lenient = args[1]->BooleanValue(env->isolate());
680
10135
  enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
681
682
20261
  MaybeStackBuffer<char> buf;
683
10135
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
684
685
10135
  if (len < 0) {
686
9
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
687
  }
688
689
20252
  args.GetReturnValue().Set(
690
20252
      String::NewFromUtf8(env->isolate(),
691
10126
                          *buf,
692
                          NewStringType::kNormal,
693
10126
                          len).ToLocalChecked());
694
}
695
696
// This is similar to wcwidth except that it takes the current unicode
697
// character properties database into consideration, allowing it to
698
// correctly calculate the column widths of things like emoji's and
699
// newer wide characters. wcwidth, on the other hand, uses a fixed
700
// algorithm that does not take things like emoji into proper
701
// consideration.
702
//
703
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
704
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
705
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
706
// allow it to be input. Linux's PTY terminal prints control characters as
707
// Narrow rhombi.
708
//
709
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
710
// consonants are 0-width when combined with initial consonants; otherwise they
711
// are technically Wide. But many terminals (including Konsole and
712
// VTE/GLib-based) implement all medials and finals as 0-width.
713
//
714
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
715
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
716
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
717
29587
static int GetColumnWidth(UChar32 codepoint,
718
                          bool ambiguous_as_full_width = false) {
719
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
720
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
721
  // or halfwidth.
722
29587
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
723

29587
  switch (eaw) {
724
    case U_EA_FULLWIDTH:
725
    case U_EA_WIDE:
726
3544
      return 2;
727
    case U_EA_AMBIGUOUS:
728
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
729
5410
      if (ambiguous_as_full_width) {
730
        return 2;
731
      }
732
      // If ambiguous_as_full_width is false:
733
      // Fall through
734
    case U_EA_NEUTRAL:
735
8615
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
736
        return 2;
737
      }
738
      // Fall through
739
    case U_EA_HALFWIDTH:
740
    case U_EA_NARROW:
741
    default:
742
      const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
743
                                  U_GC_CF_MASK |  // Format control character
744
                                  U_GC_ME_MASK |  // Enclosing mark
745
26043
                                  U_GC_MN_MASK;   // Nonspacing mark
746

52307
      if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
747
51766
          ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
748
51766
          u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
749
254
        return 0;
750
      }
751
25789
      return 1;
752
  }
753
}
754
755
// Returns the column width for the given String.
756
1673
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
757
1673
  Environment* env = Environment::GetCurrent(args);
758
5019
  CHECK(args[0]->IsString());
759
760
3346
  bool ambiguous_as_full_width = args[1]->IsTrue();
761

3346
  bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue();
762
763
3346
  TwoByteValue value(env->isolate(), args[0]);
764
  // reinterpret_cast is required by windows to compile
765
1673
  UChar* str = reinterpret_cast<UChar*>(*value);
766
  static_assert(sizeof(*str) == sizeof(**value),
767
                "sizeof(*str) == sizeof(**value)");
768
1673
  UChar32 c = 0;
769
  UChar32 p;
770
1673
  size_t n = 0;
771
1673
  uint32_t width = 0;
772
773
60847
  while (n < value.length()) {
774
29587
    p = c;
775


29587
    U16_NEXT(str, n, value.length(), c);
776
    // Don't count individual emoji codepoints that occur within an
777
    // emoji sequence. This is not necessarily foolproof. Some
778
    // environments display emoji sequences in the appropriate
779
    // condensed form (as a single emoji glyph), other environments
780
    // may not understand an emoji sequence and will display each
781
    // individual emoji separately. When this happens, the width
782
    // calculated will be off, and there's no reliable way of knowing
783
    // in advance if a particular sequence is going to be supported.
784
    // The expand_emoji_sequence option allows the caller to skip this
785
    // check and count each code within an emoji sequence separately.
786
    // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
787

59174
    if (!expand_emoji_sequence &&
788

29587
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
789
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
790
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
791
      continue;
792
    }
793
29587
    width += GetColumnWidth(c, ambiguous_as_full_width);
794
  }
795
3346
  args.GetReturnValue().Set(width);
796
1673
}
797
798
4593
void Initialize(Local<Object> target,
799
                Local<Value> unused,
800
                Local<Context> context,
801
                void* priv) {
802
4593
  Environment* env = Environment::GetCurrent(context);
803
4594
  env->SetMethod(target, "toUnicode", ToUnicode);
804
4594
  env->SetMethod(target, "toASCII", ToASCII);
805
4594
  env->SetMethod(target, "getStringWidth", GetStringWidth);
806
807
  // One-shot converters
808
4594
  env->SetMethod(target, "icuErrName", ICUErrorName);
809
4594
  env->SetMethod(target, "transcode", Transcode);
810
811
  // ConverterObject
812
  {
813
4594
    Local<FunctionTemplate> t = FunctionTemplate::New(env->isolate());
814
13780
    t->InstanceTemplate()->SetInternalFieldCount(
815
4594
        ConverterObject::kInternalFieldCount);
816
    Local<String> converter_string =
817
4593
        FIXED_ONE_BYTE_STRING(env->isolate(), "Converter");
818
4594
    t->SetClassName(converter_string);
819
4594
    env->set_i18n_converter_template(t->InstanceTemplate());
820
  }
821
822
4594
  env->SetMethod(target, "getConverter", ConverterObject::Create);
823
4593
  env->SetMethod(target, "decode", ConverterObject::Decode);
824
4594
  env->SetMethod(target, "hasConverter", ConverterObject::Has);
825
4594
}
826
827
}  // namespace i18n
828
}  // namespace node
829
830
4325
NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize)
831
832
#endif  // NODE_HAVE_I18N_SUPPORT