GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_i18n.cc Lines: 334 379 88.1 %
Date: 2020-02-19 22:14:06 Branches: 142 220 64.5 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
45
#if defined(NODE_HAVE_I18N_SUPPORT)
46
47
#include "base_object-inl.h"
48
#include "node.h"
49
#include "node_buffer.h"
50
#include "node_errors.h"
51
#include "node_internals.h"
52
#include "util-inl.h"
53
#include "v8.h"
54
55
#include <unicode/utypes.h>
56
#include <unicode/putil.h>
57
#include <unicode/uchar.h>
58
#include <unicode/uclean.h>
59
#include <unicode/udata.h>
60
#include <unicode/uidna.h>
61
#include <unicode/ucnv.h>
62
#include <unicode/utf8.h>
63
#include <unicode/utf16.h>
64
#include <unicode/timezone.h>
65
#include <unicode/ulocdata.h>
66
#include <unicode/uvernum.h>
67
#include <unicode/uversion.h>
68
#include <unicode/ustring.h>
69
70
#ifdef NODE_HAVE_SMALL_ICU
71
/* if this is defined, we have a 'secondary' entry point.
72
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
73
#define SMALL_ICUDATA_ENTRY_POINT \
74
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
75
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
76
#ifndef U_LIB_SUFFIX_C_NAME
77
#define SMALL_DEF(major, suff) icusmdt##major##_dat
78
#else
79
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
80
#endif
81
82
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
83
#endif
84
85
namespace node {
86
87
using v8::Context;
88
using v8::FunctionCallbackInfo;
89
using v8::HandleScope;
90
using v8::Int32;
91
using v8::Isolate;
92
using v8::Local;
93
using v8::MaybeLocal;
94
using v8::NewStringType;
95
using v8::Object;
96
using v8::ObjectTemplate;
97
using v8::String;
98
using v8::Uint8Array;
99
using v8::Value;
100
101
namespace i18n {
102
namespace {
103
104
template <typename T>
105
7341
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
106
7341
  MaybeLocal<Object> ret = Buffer::New(env, buf);
107

7341
  if (ret.IsEmpty())
108
    return ret;
109
110
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
111
                "Currently only one- or two-byte buffers are supported");
112

7341
  if (sizeof(T) > 1 && IsBigEndian()) {
113
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
114
    SwapBytes16(retbuf_data, retbuf_length);
115
  }
116
117
7341
  return ret;
118
}
119
120
struct Converter {
121
8
  explicit Converter(const char* name, const char* sub = nullptr)
122
8
      : conv(nullptr) {
123
8
    UErrorCode status = U_ZERO_ERROR;
124
8
    conv = ucnv_open(name, &status);
125
8
    CHECK(U_SUCCESS(status));
126
8
    if (sub != nullptr) {
127
2
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
128
    }
129
8
  }
130
131
10775
  explicit Converter(UConverter* converter,
132
10775
                     const char* sub = nullptr) : conv(converter) {
133
10775
    CHECK_NOT_NULL(conv);
134
10775
    UErrorCode status = U_ZERO_ERROR;
135
10775
    if (sub != nullptr) {
136
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
137
    }
138
10775
  }
139
140
21566
  ~Converter() {
141
10783
    ucnv_close(conv);
142
10783
  }
143
144
  UConverter* conv;
145
};
146
147
class ConverterObject : public BaseObject, Converter {
148
 public:
149
  enum ConverterFlags {
150
    CONVERTER_FLAGS_FLUSH      = 0x1,
151
    CONVERTER_FLAGS_FATAL      = 0x2,
152
    CONVERTER_FLAGS_IGNORE_BOM = 0x4
153
  };
154
155
21550
  ~ConverterObject() override = default;
156
157
2
  static void Has(const FunctionCallbackInfo<Value>& args) {
158
2
    Environment* env = Environment::GetCurrent(args);
159
4
    HandleScope scope(env->isolate());
160
161
2
    CHECK_GE(args.Length(), 1);
162
4
    Utf8Value label(env->isolate(), args[0]);
163
164
2
    UErrorCode status = U_ZERO_ERROR;
165
2
    UConverter* conv = ucnv_open(*label, &status);
166
6
    args.GetReturnValue().Set(!!U_SUCCESS(status));
167
2
    ucnv_close(conv);
168
2
  }
169
170
10775
  static void Create(const FunctionCallbackInfo<Value>& args) {
171
10775
    Environment* env = Environment::GetCurrent(args);
172
21550
    HandleScope scope(env->isolate());
173
174
10775
    Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate());
175
10775
    t->SetInternalFieldCount(1);
176
    Local<Object> obj;
177
32325
    if (!t->NewInstance(env->context()).ToLocal(&obj)) return;
178
179
10775
    CHECK_GE(args.Length(), 2);
180
21550
    Utf8Value label(env->isolate(), args[0]);
181
43100
    int flags = args[1]->Uint32Value(env->context()).ToChecked();
182
    bool fatal =
183
10775
        (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
184
    bool ignoreBOM =
185
10775
        (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM;
186
187
10775
    UErrorCode status = U_ZERO_ERROR;
188
10775
    UConverter* conv = ucnv_open(*label, &status);
189
10775
    if (U_FAILURE(status))
190
      return;
191
192
10775
    if (fatal) {
193
7006
      status = U_ZERO_ERROR;
194
      ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
195
7006
                          nullptr, nullptr, nullptr, &status);
196
    }
197
198
10775
    new ConverterObject(env, obj, conv, ignoreBOM);
199
21550
    args.GetReturnValue().Set(obj);
200
  }
201
202
7565
  static void Decode(const FunctionCallbackInfo<Value>& args) {
203
7565
    Environment* env = Environment::GetCurrent(args);
204
205
7565
    CHECK_GE(args.Length(), 3);  // Converter, Buffer, Flags
206
207
    ConverterObject* converter;
208
22461
    ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
209
7565
    ArrayBufferViewContents<char> input(args[1]);
210
30260
    int flags = args[2]->Uint32Value(env->context()).ToChecked();
211
212
7565
    UErrorCode status = U_ZERO_ERROR;
213
7799
    MaybeStackBuffer<UChar> result;
214
    MaybeLocal<Object> ret;
215
7565
    size_t limit = ucnv_getMinCharSize(converter->conv) * input.length();
216
7565
    if (limit > 0)
217
7539
      result.AllocateSufficientStorage(limit);
218
219
7565
    UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
220
7565
    auto cleanup = OnScopeLeave([&]() {
221
7565
      if (flush) {
222
        // Reset the converter state.
223
7147
        converter->bomSeen_ = false;
224
7147
        ucnv_reset(converter->conv);
225
      }
226
15364
    });
227
228
7565
    const char* source = input.data();
229
7565
    size_t source_length = input.length();
230
231
7565
    UChar* target = *result;
232
15130
    ucnv_toUnicode(converter->conv,
233
7565
                   &target, target + (limit * sizeof(UChar)),
234
                   &source, source + source_length,
235
7565
                   nullptr, flush, &status);
236
237
7565
    if (U_SUCCESS(status)) {
238
7331
      bool omit_initial_bom = false;
239
7331
      if (limit > 0) {
240
7305
        result.SetLength(target - &result[0]);
241

21807
        if (result.length() > 0 &&
242
7590
            converter->unicode_ &&
243

8077
            !converter->ignoreBOM_ &&
244
379
            !converter->bomSeen_) {
245
          // If the very first result in the stream is a BOM, and we are not
246
          // explicitly told to ignore it, then we mark it for discarding.
247
99
          if (result[0] == 0xFEFF) {
248
23
            omit_initial_bom = true;
249
          }
250
99
          converter->bomSeen_ = true;
251
        }
252
      }
253
7331
      ret = ToBufferEndian(env, &result);
254

7354
      if (omit_initial_bom && !ret.IsEmpty()) {
255
        // Peform `ret = ret.slice(2)`.
256
46
        CHECK(ret.ToLocalChecked()->IsUint8Array());
257
46
        Local<Uint8Array> orig_ret = ret.ToLocalChecked().As<Uint8Array>();
258
69
        ret = Buffer::New(env,
259
23
                          orig_ret->Buffer(),
260
23
                          orig_ret->ByteOffset() + 2,
261
69
                          orig_ret->ByteLength() - 2)
262
46
                              .FromMaybe(Local<Uint8Array>());
263
      }
264
7331
      if (!ret.IsEmpty())
265
14662
        args.GetReturnValue().Set(ret.ToLocalChecked());
266
7331
      return;
267
    }
268
269
702
    args.GetReturnValue().Set(status);
270
  }
271
272
  SET_NO_MEMORY_INFO()
273
  SET_MEMORY_INFO_NAME(ConverterObject)
274
  SET_SELF_SIZE(ConverterObject)
275
276
 protected:
277
10775
  ConverterObject(Environment* env,
278
                  Local<Object> wrap,
279
                  UConverter* converter,
280
                  bool ignoreBOM,
281
10775
                  const char* sub = nullptr) :
282
                  BaseObject(env, wrap),
283
                  Converter(converter, sub),
284
10775
                  ignoreBOM_(ignoreBOM) {
285
10775
    MakeWeak();
286
287
10775
    switch (ucnv_getType(converter)) {
288
      case UCNV_UTF8:
289
      case UCNV_UTF16_BigEndian:
290
      case UCNV_UTF16_LittleEndian:
291
310
        unicode_ = true;
292
310
        break;
293
      default:
294
10465
        unicode_ = false;
295
    }
296
10775
  }
297
298
 private:
299
  bool unicode_ = false;     // True if this is a Unicode converter
300
  bool ignoreBOM_ = false;   // True if the BOM should be ignored on Unicode
301
  bool bomSeen_ = false;     // True if the BOM has been seen
302
};
303
304
// One-Shot Converters
305
306
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
307
                      const char* data,
308
                      const size_t length,
309
                      const size_t length_in_chars) {
310
2
  dest->AllocateSufficientStorage(length_in_chars);
311
2
  char* dst = reinterpret_cast<char*>(**dest);
312
2
  memcpy(dst, data, length);
313
2
  if (IsBigEndian()) {
314
    SwapBytes16(dst, length);
315
  }
316
2
}
317
318
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
319
                                            const char* fromEncoding,
320
                                            const char* toEncoding,
321
                                            const char* source,
322
                                            const size_t source_length,
323
                                            UErrorCode* status);
324
325
2
MaybeLocal<Object> Transcode(Environment* env,
326
                             const char* fromEncoding,
327
                             const char* toEncoding,
328
                             const char* source,
329
                             const size_t source_length,
330
                             UErrorCode* status) {
331
2
  *status = U_ZERO_ERROR;
332
  MaybeLocal<Object> ret;
333
4
  MaybeStackBuffer<char> result;
334
4
  Converter to(toEncoding, "?");
335
4
  Converter from(fromEncoding);
336
2
  const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv);
337
2
  result.AllocateSufficientStorage(limit);
338
2
  char* target = *result;
339
2
  ucnv_convertEx(to.conv, from.conv, &target, target + limit,
340
                 &source, source + source_length, nullptr, nullptr,
341
2
                 nullptr, nullptr, true, true, status);
342
2
  if (U_SUCCESS(*status)) {
343
2
    result.SetLength(target - &result[0]);
344
2
    ret = ToBufferEndian(env, &result);
345
  }
346
4
  return ret;
347
}
348
349
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
350
                                   const char* fromEncoding,
351
                                   const char* toEncoding,
352
                                   const char* source,
353
                                   const size_t source_length,
354
                                   UErrorCode* status) {
355
4
  *status = U_ZERO_ERROR;
356
  MaybeLocal<Object> ret;
357
8
  MaybeStackBuffer<UChar> destbuf(source_length);
358
8
  Converter from(fromEncoding);
359
4
  const size_t length_in_chars = source_length * sizeof(UChar);
360
4
  ucnv_toUChars(from.conv, *destbuf, length_in_chars,
361
4
                source, source_length, status);
362
4
  if (U_SUCCESS(*status))
363
4
    ret = ToBufferEndian(env, &destbuf);
364
8
  return ret;
365
}
366
367
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
368
                                     const char* fromEncoding,
369
                                     const char* toEncoding,
370
                                     const char* source,
371
                                     const size_t source_length,
372
                                     UErrorCode* status) {
373
  *status = U_ZERO_ERROR;
374
  MaybeStackBuffer<UChar> sourcebuf;
375
  MaybeLocal<Object> ret;
376
  Converter to(toEncoding, "?");
377
  const size_t length_in_chars = source_length / sizeof(UChar);
378
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
379
  MaybeStackBuffer<char> destbuf(length_in_chars);
380
  const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars,
381
                                       *sourcebuf, length_in_chars, status);
382
  if (U_SUCCESS(*status)) {
383
    destbuf.SetLength(len);
384
    ret = ToBufferEndian(env, &destbuf);
385
  }
386
  return ret;
387
}
388
389
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
390
                                         const char* fromEncoding,
391
                                         const char* toEncoding,
392
                                         const char* source,
393
                                         const size_t source_length,
394
                                         UErrorCode* status) {
395
2
  *status = U_ZERO_ERROR;
396
4
  MaybeStackBuffer<UChar> destbuf;
397
  int32_t result_length;
398
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
399
2
                source, source_length, status);
400
  MaybeLocal<Object> ret;
401
2
  if (U_SUCCESS(*status)) {
402
1
    destbuf.SetLength(result_length);
403
1
    ret = ToBufferEndian(env, &destbuf);
404
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
405
1
    *status = U_ZERO_ERROR;
406
1
    destbuf.AllocateSufficientStorage(result_length);
407
2
    u_strFromUTF8(*destbuf, result_length, &result_length,
408
2
                  source, source_length, status);
409
1
    if (U_SUCCESS(*status)) {
410
1
      destbuf.SetLength(result_length);
411
1
      ret = ToBufferEndian(env, &destbuf);
412
    }
413
  }
414
4
  return ret;
415
}
416
417
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
418
                                         const char* fromEncoding,
419
                                         const char* toEncoding,
420
                                         const char* source,
421
                                         const size_t source_length,
422
                                         UErrorCode* status) {
423
2
  *status = U_ZERO_ERROR;
424
  MaybeLocal<Object> ret;
425
2
  const size_t length_in_chars = source_length / sizeof(UChar);
426
  int32_t result_length;
427
4
  MaybeStackBuffer<UChar> sourcebuf;
428
4
  MaybeStackBuffer<char> destbuf;
429
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
430
4
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
431
4
              *sourcebuf, length_in_chars, status);
432
2
  if (U_SUCCESS(*status)) {
433
1
    destbuf.SetLength(result_length);
434
1
    ret = ToBufferEndian(env, &destbuf);
435
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
436
1
    *status = U_ZERO_ERROR;
437
1
    destbuf.AllocateSufficientStorage(result_length);
438
2
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
439
2
                length_in_chars, status);
440
1
    if (U_SUCCESS(*status)) {
441
1
      destbuf.SetLength(result_length);
442
1
      ret = ToBufferEndian(env, &destbuf);
443
    }
444
  }
445
4
  return ret;
446
}
447
448
20
const char* EncodingName(const enum encoding encoding) {
449

20
  switch (encoding) {
450
2
    case ASCII: return "us-ascii";
451
4
    case LATIN1: return "iso8859-1";
452
8
    case UCS2: return "utf16le";
453
6
    case UTF8: return "utf-8";
454
    default: return nullptr;
455
  }
456
}
457
458
22
bool SupportedEncoding(const enum encoding encoding) {
459
22
  switch (encoding) {
460
    case ASCII:
461
    case LATIN1:
462
    case UCS2:
463
20
    case UTF8: return true;
464
2
    default: return false;
465
  }
466
}
467
468
12
void Transcode(const FunctionCallbackInfo<Value>&args) {
469
12
  Environment* env = Environment::GetCurrent(args);
470
12
  Isolate* isolate = env->isolate();
471
12
  UErrorCode status = U_ZERO_ERROR;
472
  MaybeLocal<Object> result;
473
474
12
  ArrayBufferViewContents<char> input(args[0]);
475
12
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
476
12
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
477
478

12
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
479
10
    TranscodeFunc tfn = &Transcode;
480

10
    switch (fromEncoding) {
481
      case ASCII:
482
      case LATIN1:
483
4
        if (toEncoding == UCS2)
484
4
          tfn = &TranscodeToUcs2;
485
4
        break;
486
      case UTF8:
487
4
        if (toEncoding == UCS2)
488
2
          tfn = &TranscodeUcs2FromUtf8;
489
4
        break;
490
      case UCS2:
491
2
        switch (toEncoding) {
492
          case UCS2:
493
            tfn = &Transcode;
494
            break;
495
          case UTF8:
496
2
            tfn = &TranscodeUtf8FromUcs2;
497
2
            break;
498
          default:
499
            tfn = &TranscodeFromUcs2;
500
        }
501
2
        break;
502
      default:
503
        // This should not happen because of the SupportedEncoding checks
504
        ABORT();
505
    }
506
507
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
508
10
                 input.data(), input.length(), &status);
509
  } else {
510
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
511
  }
512
513
12
  if (result.IsEmpty())
514
6
    return args.GetReturnValue().Set(status);
515
516
20
  return args.GetReturnValue().Set(result.ToLocalChecked());
517
}
518
519
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
520
2
  Environment* env = Environment::GetCurrent(args);
521
4
  CHECK(args[0]->IsInt32());
522
6
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
523
4
  args.GetReturnValue().Set(
524
4
      String::NewFromUtf8(env->isolate(),
525
                          u_errorName(status),
526
2
                          NewStringType::kNormal).ToLocalChecked());
527
2
}
528
529
}  // anonymous namespace
530
531
4139
bool InitializeICUDirectory(const std::string& path) {
532
4139
  UErrorCode status = U_ZERO_ERROR;
533
4139
  if (path.empty()) {
534
#ifdef NODE_HAVE_SMALL_ICU
535
    // install the 'small' data.
536
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
537
#else  // !NODE_HAVE_SMALL_ICU
538
    // no small data, so nothing to do.
539
#endif  // !NODE_HAVE_SMALL_ICU
540
  } else {
541
    u_setDataDirectory(path.c_str());
542
    u_init(&status);
543
  }
544
4139
  return status == U_ZERO_ERROR;
545
}
546
547
382
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
548
                  const char* input,
549
                  size_t length) {
550
382
  UErrorCode status = U_ZERO_ERROR;
551
382
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
552
382
  UIDNA* uidna = uidna_openUTS46(options, &status);
553
382
  if (U_FAILURE(status))
554
    return -1;
555
382
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
556
557
382
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
558
                                        input, length,
559
382
                                        **buf, buf->capacity(),
560
                                        &info,
561
382
                                        &status);
562
563
  // Do not check info.errors like we do with ToASCII since ToUnicode always
564
  // returns a string, despite any possible errors that may have occurred.
565
566
382
  if (status == U_BUFFER_OVERFLOW_ERROR) {
567
    status = U_ZERO_ERROR;
568
    buf->AllocateSufficientStorage(len);
569
    len = uidna_nameToUnicodeUTF8(uidna,
570
                                  input, length,
571
                                  **buf, buf->capacity(),
572
                                  &info,
573
                                  &status);
574
  }
575
576
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
577
  // string, regardless of whether an error occurred.
578
579
382
  if (U_FAILURE(status)) {
580
    len = -1;
581
    buf->SetLength(0);
582
  } else {
583
382
    buf->SetLength(len);
584
  }
585
586
382
  uidna_close(uidna);
587
382
  return len;
588
}
589
590
12035
int32_t ToASCII(MaybeStackBuffer<char>* buf,
591
                const char* input,
592
                size_t length,
593
                enum idna_mode mode) {
594
12035
  UErrorCode status = U_ZERO_ERROR;
595
  uint32_t options =                  // CheckHyphens = false; handled later
596
    UIDNA_CHECK_BIDI |                // CheckBidi = true
597
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
598
12035
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
599
12035
  if (mode == IDNA_STRICT) {
600
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
601
                                      // VerifyDnsLength = beStrict;
602
                                      //   handled later
603
  }
604
605
12035
  UIDNA* uidna = uidna_openUTS46(options, &status);
606
12035
  if (U_FAILURE(status))
607
    return -1;
608
12035
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
609
610
12035
  int32_t len = uidna_nameToASCII_UTF8(uidna,
611
                                       input, length,
612
12035
                                       **buf, buf->capacity(),
613
                                       &info,
614
12035
                                       &status);
615
616
12035
  if (status == U_BUFFER_OVERFLOW_ERROR) {
617
2
    status = U_ZERO_ERROR;
618
2
    buf->AllocateSufficientStorage(len);
619
2
    len = uidna_nameToASCII_UTF8(uidna,
620
                                 input, length,
621
2
                                 **buf, buf->capacity(),
622
                                 &info,
623
2
                                 &status);
624
  }
625
626
  // In UTS #46 which specifies ToASCII, certain error conditions are
627
  // configurable through options, and the WHATWG URL Standard promptly elects
628
  // to disable some of them to accommodate for real-world use cases.
629
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
630
  // these options through `options` above, and thus continues throwing
631
  // unnecessary errors. To counter this situation, we just filter out the
632
  // errors that may have happened afterwards, before deciding whether to
633
  // return an error from this function.
634
635
  // CheckHyphens = false
636
  // (Specified in the current UTS #46 draft rev. 18.)
637
  // Refs:
638
  // - https://github.com/whatwg/url/issues/53
639
  // - https://github.com/whatwg/url/pull/309
640
  // - http://www.unicode.org/review/pri317/
641
  // - http://www.unicode.org/reports/tr46/tr46-18.html
642
  // - https://www.icann.org/news/announcement-2000-01-07-en
643
12035
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
644
12035
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
645
12035
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
646
647
12035
  if (mode != IDNA_STRICT) {
648
    // VerifyDnsLength = beStrict
649
12035
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
650
12035
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
651
12035
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
652
  }
653
654


12035
  if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
655
78
    len = -1;
656
78
    buf->SetLength(0);
657
  } else {
658
11957
    buf->SetLength(len);
659
  }
660
661
12035
  uidna_close(uidna);
662
12035
  return len;
663
}
664
665
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
666
189
  Environment* env = Environment::GetCurrent(args);
667
189
  CHECK_GE(args.Length(), 1);
668
567
  CHECK(args[0]->IsString());
669
378
  Utf8Value val(env->isolate(), args[0]);
670
671
378
  MaybeStackBuffer<char> buf;
672
189
  int32_t len = ToUnicode(&buf, *val, val.length());
673
674
189
  if (len < 0) {
675
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
676
  }
677
678
378
  args.GetReturnValue().Set(
679
378
      String::NewFromUtf8(env->isolate(),
680
189
                          *buf,
681
                          NewStringType::kNormal,
682
189
                          len).ToLocalChecked());
683
}
684
685
9980
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
686
9980
  Environment* env = Environment::GetCurrent(args);
687
9980
  CHECK_GE(args.Length(), 1);
688
29940
  CHECK(args[0]->IsString());
689
19951
  Utf8Value val(env->isolate(), args[0]);
690
  // optional arg
691
29940
  bool lenient = args[1]->BooleanValue(env->isolate());
692
9980
  enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
693
694
19951
  MaybeStackBuffer<char> buf;
695
9980
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
696
697
9980
  if (len < 0) {
698
9
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
699
  }
700
701
19942
  args.GetReturnValue().Set(
702
19942
      String::NewFromUtf8(env->isolate(),
703
9971
                          *buf,
704
                          NewStringType::kNormal,
705
9971
                          len).ToLocalChecked());
706
}
707
708
// This is similar to wcwidth except that it takes the current unicode
709
// character properties database into consideration, allowing it to
710
// correctly calculate the column widths of things like emoji's and
711
// newer wide characters. wcwidth, on the other hand, uses a fixed
712
// algorithm that does not take things like emoji into proper
713
// consideration.
714
//
715
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
716
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
717
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
718
// allow it to be input. Linux's PTY terminal prints control characters as
719
// Narrow rhombi.
720
//
721
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
722
// consonants are 0-width when combined with initial consonants; otherwise they
723
// are technically Wide. But many terminals (including Konsole and
724
// VTE/GLib-based) implement all medials and finals as 0-width.
725
//
726
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
727
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
728
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
729
29219
static int GetColumnWidth(UChar32 codepoint,
730
                          bool ambiguous_as_full_width = false) {
731
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
732
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
733
  // or halfwidth.
734
29219
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
735

29219
  switch (eaw) {
736
    case U_EA_FULLWIDTH:
737
    case U_EA_WIDE:
738
2579
      return 2;
739
    case U_EA_AMBIGUOUS:
740
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
741
5661
      if (ambiguous_as_full_width) {
742
        return 2;
743
      }
744
      // If ambiguous_as_full_width is false:
745
      // Fall through
746
    case U_EA_NEUTRAL:
747
8437
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
748
        return 2;
749
      }
750
      // Fall through
751
    case U_EA_HALFWIDTH:
752
    case U_EA_NARROW:
753
    default:
754
      const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
755
                                  U_GC_CF_MASK |  // Format control character
756
                                  U_GC_ME_MASK |  // Enclosing mark
757
26640
                                  U_GC_MN_MASK;   // Nonspacing mark
758

53453
      if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
759
53011
          ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
760
53011
          u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
761
205
        return 0;
762
      }
763
26435
      return 1;
764
  }
765
}
766
767
// Returns the column width for the given String.
768
1586
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
769
1586
  Environment* env = Environment::GetCurrent(args);
770
4758
  CHECK(args[0]->IsString());
771
772
3172
  bool ambiguous_as_full_width = args[1]->IsTrue();
773

3172
  bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue();
774
775
3172
  TwoByteValue value(env->isolate(), args[0]);
776
  // reinterpret_cast is required by windows to compile
777
1586
  UChar* str = reinterpret_cast<UChar*>(*value);
778
  static_assert(sizeof(*str) == sizeof(**value),
779
                "sizeof(*str) == sizeof(**value)");
780
1586
  UChar32 c = 0;
781
  UChar32 p;
782
1586
  size_t n = 0;
783
1586
  uint32_t width = 0;
784
785
60024
  while (n < value.length()) {
786
29219
    p = c;
787


29219
    U16_NEXT(str, n, value.length(), c);
788
    // Don't count individual emoji codepoints that occur within an
789
    // emoji sequence. This is not necessarily foolproof. Some
790
    // environments display emoji sequences in the appropriate
791
    // condensed form (as a single emoji glyph), other environments
792
    // may not understand an emoji sequence and will display each
793
    // individual emoji separately. When this happens, the width
794
    // calculated will be off, and there's no reliable way of knowing
795
    // in advance if a particular sequence is going to be supported.
796
    // The expand_emoji_sequence option allows the caller to skip this
797
    // check and count each code within an emoji sequence separately.
798
    // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
799

58438
    if (!expand_emoji_sequence &&
800

29219
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
801
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
802
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
803
      continue;
804
    }
805
29219
    width += GetColumnWidth(c, ambiguous_as_full_width);
806
  }
807
3172
  args.GetReturnValue().Set(width);
808
1586
}
809
810
4377
void Initialize(Local<Object> target,
811
                Local<Value> unused,
812
                Local<Context> context,
813
                void* priv) {
814
4377
  Environment* env = Environment::GetCurrent(context);
815
4377
  env->SetMethod(target, "toUnicode", ToUnicode);
816
4377
  env->SetMethod(target, "toASCII", ToASCII);
817
4377
  env->SetMethod(target, "getStringWidth", GetStringWidth);
818
819
  // One-shot converters
820
4377
  env->SetMethod(target, "icuErrName", ICUErrorName);
821
4377
  env->SetMethod(target, "transcode", Transcode);
822
823
  // ConverterObject
824
4377
  env->SetMethod(target, "getConverter", ConverterObject::Create);
825
4377
  env->SetMethod(target, "decode", ConverterObject::Decode);
826
4377
  env->SetMethod(target, "hasConverter", ConverterObject::Has);
827
4377
}
828
829
}  // namespace i18n
830
}  // namespace node
831
832
4185
NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize)
833
834
#endif  // NODE_HAVE_I18N_SUPPORT