GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage/nodes/benchmark/out/../src/node_i18n.cc Lines: 313 359 87.2 %
Date: 2019-01-07 12:15:22 Branches: 147 222 66.2 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
45
#if defined(NODE_HAVE_I18N_SUPPORT)
46
47
#include "node.h"
48
#include "node_buffer.h"
49
#include "node_errors.h"
50
#include "env-inl.h"
51
#include "util-inl.h"
52
#include "base_object-inl.h"
53
#include "v8.h"
54
55
#include <unicode/utypes.h>
56
#include <unicode/putil.h>
57
#include <unicode/uchar.h>
58
#include <unicode/uclean.h>
59
#include <unicode/udata.h>
60
#include <unicode/uidna.h>
61
#include <unicode/ucnv.h>
62
#include <unicode/utf8.h>
63
#include <unicode/utf16.h>
64
#include <unicode/timezone.h>
65
#include <unicode/ulocdata.h>
66
#include <unicode/uvernum.h>
67
#include <unicode/uversion.h>
68
#include <unicode/ustring.h>
69
70
#ifdef NODE_HAVE_SMALL_ICU
71
/* if this is defined, we have a 'secondary' entry point.
72
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
73
#define SMALL_ICUDATA_ENTRY_POINT \
74
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
75
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
76
#ifndef U_LIB_SUFFIX_C_NAME
77
#define SMALL_DEF(major, suff) icusmdt##major##_dat
78
#else
79
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
80
#endif
81
82
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
83
#endif
84
85
namespace node {
86
87
using v8::Context;
88
using v8::FunctionCallbackInfo;
89
using v8::HandleScope;
90
using v8::Int32;
91
using v8::Isolate;
92
using v8::Local;
93
using v8::MaybeLocal;
94
using v8::NewStringType;
95
using v8::Object;
96
using v8::ObjectTemplate;
97
using v8::String;
98
using v8::Value;
99
100
namespace i18n {
101
namespace {
102
103
template <typename T>
104
519
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
105
519
  MaybeLocal<Object> ret = Buffer::New(env, buf);
106

519
  if (ret.IsEmpty())
107
    return ret;
108
109
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
110
                "Currently only one- or two-byte buffers are supported");
111

519
  if (sizeof(T) > 1 && IsBigEndian()) {
112
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
113
    SwapBytes16(retbuf_data, retbuf_length);
114
  }
115
116
519
  return ret;
117
}
118
119
struct Converter {
120
564
  explicit Converter(const char* name, const char* sub = nullptr)
121
564
      : conv(nullptr) {
122
564
    UErrorCode status = U_ZERO_ERROR;
123
564
    conv = ucnv_open(name, &status);
124
564
    CHECK(U_SUCCESS(status));
125
564
    if (sub != nullptr) {
126
2
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
127
    }
128
564
  }
129
130
156
  explicit Converter(UConverter* converter,
131
156
                     const char* sub = nullptr) : conv(converter) {
132
156
    CHECK_NOT_NULL(conv);
133
156
    UErrorCode status = U_ZERO_ERROR;
134
156
    if (sub != nullptr) {
135
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
136
    }
137
156
  }
138
139
720
  ~Converter() {
140
720
    ucnv_close(conv);
141
720
  }
142
143
  UConverter* conv;
144
};
145
146
class ConverterObject : public BaseObject, Converter {
147
 public:
148
  enum ConverterFlags {
149
    CONVERTER_FLAGS_FLUSH      = 0x1,
150
    CONVERTER_FLAGS_FATAL      = 0x2,
151
    CONVERTER_FLAGS_IGNORE_BOM = 0x4
152
  };
153
154
312
  ~ConverterObject() override {}
155
156
  static void Has(const FunctionCallbackInfo<Value>& args) {
157
    Environment* env = Environment::GetCurrent(args);
158
    HandleScope scope(env->isolate());
159
160
    CHECK_GE(args.Length(), 1);
161
    Utf8Value label(env->isolate(), args[0]);
162
163
    UErrorCode status = U_ZERO_ERROR;
164
    UConverter* conv = ucnv_open(*label, &status);
165
    args.GetReturnValue().Set(!!U_SUCCESS(status));
166
    ucnv_close(conv);
167
  }
168
169
156
  static void Create(const FunctionCallbackInfo<Value>& args) {
170
156
    Environment* env = Environment::GetCurrent(args);
171
156
    HandleScope scope(env->isolate());
172
173
156
    CHECK_GE(args.Length(), 2);
174
312
    Utf8Value label(env->isolate(), args[0]);
175
624
    int flags = args[1]->Uint32Value(env->context()).ToChecked();
176
    bool fatal =
177
156
        (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
178
    bool ignoreBOM =
179
156
        (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM;
180
181
156
    UErrorCode status = U_ZERO_ERROR;
182
156
    UConverter* conv = ucnv_open(*label, &status);
183
156
    if (U_FAILURE(status))
184
156
      return;
185
186
156
    if (fatal) {
187
50
      status = U_ZERO_ERROR;
188
      ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
189
50
                          nullptr, nullptr, nullptr, &status);
190
    }
191
192
156
    Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate());
193
156
    t->SetInternalFieldCount(1);
194
468
    Local<Object> obj = t->NewInstance(env->context()).ToLocalChecked();
195
156
    new ConverterObject(env, obj, conv, ignoreBOM);
196
468
    args.GetReturnValue().Set(obj);
197
  }
198
199
556
  static void Decode(const FunctionCallbackInfo<Value>& args) {
200
556
    Environment* env = Environment::GetCurrent(args);
201
202
556
    CHECK_GE(args.Length(), 3);  // Converter, Buffer, Flags
203
204
556
    Converter utf8("utf8");
205
    ConverterObject* converter;
206
1112
    ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
207

4448
    SPREAD_BUFFER_ARG(args[1], input_obj);
208
2224
    int flags = args[2]->Uint32Value(env->context()).ToChecked();
209
210
556
    UErrorCode status = U_ZERO_ERROR;
211
603
    MaybeStackBuffer<UChar> result;
212
    MaybeLocal<Object> ret;
213
556
    size_t limit = ucnv_getMinCharSize(converter->conv) *
214
556
                   input_obj_length;
215
556
    if (limit > 0)
216
530
      result.AllocateSufficientStorage(limit);
217
218
556
    UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
219
556
    OnScopeLeave cleanup([&]() {
220
556
      if (flush) {
221
        // Reset the converter state.
222
141
        converter->bomSeen_ = false;
223
141
        ucnv_reset(converter->conv);
224
      }
225
1159
    });
226
227
556
    const char* source = input_obj_data;
228
556
    size_t source_length = input_obj_length;
229
230

556
    if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
231
130
      int32_t bomOffset = 0;
232
130
      ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
233
130
      source += bomOffset;
234
130
      source_length -= bomOffset;
235
130
      converter->bomSeen_ = true;
236
    }
237
238
556
    UChar* target = *result;
239
    ucnv_toUnicode(converter->conv,
240
556
                   &target, target + (limit * sizeof(UChar)),
241
                   &source, source + source_length,
242
1112
                   nullptr, flush, &status);
243
244
556
    if (U_SUCCESS(status)) {
245
509
      if (limit > 0)
246
483
        result.SetLength(target - &result[0]);
247
509
      ret = ToBufferEndian(env, &result);
248
1018
      args.GetReturnValue().Set(ret.ToLocalChecked());
249
509
      return;
250
    }
251
252
188
    args.GetReturnValue().Set(status);
253
  }
254
255
  SET_NO_MEMORY_INFO()
256
  SET_MEMORY_INFO_NAME(ConverterObject)
257
  SET_SELF_SIZE(ConverterObject)
258
259
 protected:
260
156
  ConverterObject(Environment* env,
261
                  Local<Object> wrap,
262
                  UConverter* converter,
263
                  bool ignoreBOM,
264
                  const char* sub = nullptr) :
265
                  BaseObject(env, wrap),
266
                  Converter(converter, sub),
267
156
                  ignoreBOM_(ignoreBOM) {
268
156
    MakeWeak();
269
270
156
    switch (ucnv_getType(converter)) {
271
      case UCNV_UTF8:
272
      case UCNV_UTF16_BigEndian:
273
      case UCNV_UTF16_LittleEndian:
274
156
        unicode_ = true;
275
156
        break;
276
      default:
277
        unicode_ = false;
278
    }
279
156
  }
280
281
 private:
282
  bool unicode_ = false;     // True if this is a Unicode converter
283
  bool ignoreBOM_ = false;   // True if the BOM should be ignored on Unicode
284
  bool bomSeen_ = false;     // True if the BOM has been seen
285
};
286
287
// One-Shot Converters
288
289
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
290
                      const char* data,
291
                      const size_t length,
292
                      const size_t length_in_chars) {
293
2
  dest->AllocateSufficientStorage(length_in_chars);
294
2
  char* dst = reinterpret_cast<char*>(**dest);
295
2
  memcpy(dst, data, length);
296
2
  if (IsBigEndian()) {
297
    SwapBytes16(dst, length);
298
  }
299
2
}
300
301
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
302
                                            const char* fromEncoding,
303
                                            const char* toEncoding,
304
                                            const char* source,
305
                                            const size_t source_length,
306
                                            UErrorCode* status);
307
308
2
MaybeLocal<Object> Transcode(Environment* env,
309
                             const char* fromEncoding,
310
                             const char* toEncoding,
311
                             const char* source,
312
                             const size_t source_length,
313
                             UErrorCode* status) {
314
2
  *status = U_ZERO_ERROR;
315
  MaybeLocal<Object> ret;
316
2
  MaybeStackBuffer<char> result;
317
4
  Converter to(toEncoding, "?");
318
4
  Converter from(fromEncoding);
319
2
  const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv);
320
2
  result.AllocateSufficientStorage(limit);
321
2
  char* target = *result;
322
  ucnv_convertEx(to.conv, from.conv, &target, target + limit,
323
                 &source, source + source_length, nullptr, nullptr,
324
2
                 nullptr, nullptr, true, true, status);
325
2
  if (U_SUCCESS(*status)) {
326
2
    result.SetLength(target - &result[0]);
327
2
    ret = ToBufferEndian(env, &result);
328
  }
329
4
  return ret;
330
}
331
332
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
333
                                   const char* fromEncoding,
334
                                   const char* toEncoding,
335
                                   const char* source,
336
                                   const size_t source_length,
337
                                   UErrorCode* status) {
338
4
  *status = U_ZERO_ERROR;
339
  MaybeLocal<Object> ret;
340
4
  MaybeStackBuffer<UChar> destbuf(source_length);
341
8
  Converter from(fromEncoding);
342
4
  const size_t length_in_chars = source_length * sizeof(UChar);
343
  ucnv_toUChars(from.conv, *destbuf, length_in_chars,
344
4
                source, source_length, status);
345
4
  if (U_SUCCESS(*status))
346
4
    ret = ToBufferEndian(env, &destbuf);
347
8
  return ret;
348
}
349
350
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
351
                                     const char* fromEncoding,
352
                                     const char* toEncoding,
353
                                     const char* source,
354
                                     const size_t source_length,
355
                                     UErrorCode* status) {
356
  *status = U_ZERO_ERROR;
357
  MaybeStackBuffer<UChar> sourcebuf;
358
  MaybeLocal<Object> ret;
359
  Converter to(toEncoding, "?");
360
  const size_t length_in_chars = source_length / sizeof(UChar);
361
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
362
  MaybeStackBuffer<char> destbuf(length_in_chars);
363
  const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars,
364
                                       *sourcebuf, length_in_chars, status);
365
  if (U_SUCCESS(*status)) {
366
    destbuf.SetLength(len);
367
    ret = ToBufferEndian(env, &destbuf);
368
  }
369
  return ret;
370
}
371
372
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
373
                                         const char* fromEncoding,
374
                                         const char* toEncoding,
375
                                         const char* source,
376
                                         const size_t source_length,
377
                                         UErrorCode* status) {
378
2
  *status = U_ZERO_ERROR;
379
2
  MaybeStackBuffer<UChar> destbuf;
380
  int32_t result_length;
381
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
382
4
                source, source_length, status);
383
  MaybeLocal<Object> ret;
384
2
  if (U_SUCCESS(*status)) {
385
1
    destbuf.SetLength(result_length);
386
1
    ret = ToBufferEndian(env, &destbuf);
387
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
388
1
    *status = U_ZERO_ERROR;
389
1
    destbuf.AllocateSufficientStorage(result_length);
390
    u_strFromUTF8(*destbuf, result_length, &result_length,
391
1
                  source, source_length, status);
392
1
    if (U_SUCCESS(*status)) {
393
1
      destbuf.SetLength(result_length);
394
1
      ret = ToBufferEndian(env, &destbuf);
395
    }
396
  }
397
2
  return ret;
398
}
399
400
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
401
                                         const char* fromEncoding,
402
                                         const char* toEncoding,
403
                                         const char* source,
404
                                         const size_t source_length,
405
                                         UErrorCode* status) {
406
2
  *status = U_ZERO_ERROR;
407
  MaybeLocal<Object> ret;
408
2
  const size_t length_in_chars = source_length / sizeof(UChar);
409
  int32_t result_length;
410
2
  MaybeStackBuffer<UChar> sourcebuf;
411
4
  MaybeStackBuffer<char> destbuf;
412
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
413
2
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
414
4
              *sourcebuf, length_in_chars, status);
415
2
  if (U_SUCCESS(*status)) {
416
1
    destbuf.SetLength(result_length);
417
1
    ret = ToBufferEndian(env, &destbuf);
418
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
419
1
    *status = U_ZERO_ERROR;
420
1
    destbuf.AllocateSufficientStorage(result_length);
421
1
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
422
2
                length_in_chars, status);
423
1
    if (U_SUCCESS(*status)) {
424
1
      destbuf.SetLength(result_length);
425
1
      ret = ToBufferEndian(env, &destbuf);
426
    }
427
  }
428
4
  return ret;
429
}
430
431
20
const char* EncodingName(const enum encoding encoding) {
432

20
  switch (encoding) {
433
2
    case ASCII: return "us-ascii";
434
4
    case LATIN1: return "iso8859-1";
435
8
    case UCS2: return "utf16le";
436
6
    case UTF8: return "utf-8";
437
    default: return nullptr;
438
  }
439
}
440
441
22
bool SupportedEncoding(const enum encoding encoding) {
442
22
  switch (encoding) {
443
    case ASCII:
444
    case LATIN1:
445
    case UCS2:
446
20
    case UTF8: return true;
447
2
    default: return false;
448
  }
449
}
450
451
12
void Transcode(const FunctionCallbackInfo<Value>&args) {
452
12
  Environment* env = Environment::GetCurrent(args);
453
12
  Isolate* isolate = env->isolate();
454
12
  UErrorCode status = U_ZERO_ERROR;
455
  MaybeLocal<Object> result;
456
457
12
  CHECK(Buffer::HasInstance(args[0]));
458

96
  SPREAD_BUFFER_ARG(args[0], ts_obj);
459
12
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
460
12
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
461
462

12
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
463
10
    TranscodeFunc tfn = &Transcode;
464

10
    switch (fromEncoding) {
465
      case ASCII:
466
      case LATIN1:
467
4
        if (toEncoding == UCS2)
468
4
          tfn = &TranscodeToUcs2;
469
4
        break;
470
      case UTF8:
471
4
        if (toEncoding == UCS2)
472
2
          tfn = &TranscodeUcs2FromUtf8;
473
4
        break;
474
      case UCS2:
475
2
        switch (toEncoding) {
476
          case UCS2:
477
            tfn = &Transcode;
478
            break;
479
          case UTF8:
480
2
            tfn = &TranscodeUtf8FromUcs2;
481
2
            break;
482
          default:
483
            tfn = &TranscodeFromUcs2;
484
        }
485
2
        break;
486
      default:
487
        // This should not happen because of the SupportedEncoding checks
488
        ABORT();
489
    }
490
491
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
492
10
                 ts_obj_data, ts_obj_length, &status);
493
  } else {
494
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
495
  }
496
497
12
  if (result.IsEmpty())
498
6
    return args.GetReturnValue().Set(status);
499
500
20
  return args.GetReturnValue().Set(result.ToLocalChecked());
501
}
502
503
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
504
2
  Environment* env = Environment::GetCurrent(args);
505
4
  CHECK(args[0]->IsInt32());
506
6
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
507
  args.GetReturnValue().Set(
508
      String::NewFromUtf8(env->isolate(),
509
                          u_errorName(status),
510
6
                          NewStringType::kNormal).ToLocalChecked());
511
2
}
512
513
}  // anonymous namespace
514
515
3564
bool InitializeICUDirectory(const std::string& path) {
516
3564
  UErrorCode status = U_ZERO_ERROR;
517
3564
  if (path.empty()) {
518
#ifdef NODE_HAVE_SMALL_ICU
519
    // install the 'small' data.
520
3562
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
521
#else  // !NODE_HAVE_SMALL_ICU
522
    // no small data, so nothing to do.
523
#endif  // !NODE_HAVE_SMALL_ICU
524
  } else {
525
2
    u_setDataDirectory(path.c_str());
526
2
    u_init(&status);
527
  }
528
3564
  return status == U_ZERO_ERROR;
529
}
530
531
382
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
532
                  const char* input,
533
                  size_t length) {
534
382
  UErrorCode status = U_ZERO_ERROR;
535
382
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
536
382
  UIDNA* uidna = uidna_openUTS46(options, &status);
537
382
  if (U_FAILURE(status))
538
    return -1;
539
382
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
540
541
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
542
                                        input, length,
543
382
                                        **buf, buf->capacity(),
544
                                        &info,
545
382
                                        &status);
546
547
  // Do not check info.errors like we do with ToASCII since ToUnicode always
548
  // returns a string, despite any possible errors that may have occurred.
549
550
382
  if (status == U_BUFFER_OVERFLOW_ERROR) {
551
    status = U_ZERO_ERROR;
552
    buf->AllocateSufficientStorage(len);
553
    len = uidna_nameToUnicodeUTF8(uidna,
554
                                  input, length,
555
                                  **buf, buf->capacity(),
556
                                  &info,
557
                                  &status);
558
  }
559
560
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
561
  // string, regardless of whether an error occurred.
562
563
382
  if (U_FAILURE(status)) {
564
    len = -1;
565
    buf->SetLength(0);
566
  } else {
567
382
    buf->SetLength(len);
568
  }
569
570
382
  uidna_close(uidna);
571
382
  return len;
572
}
573
574
6325
int32_t ToASCII(MaybeStackBuffer<char>* buf,
575
                const char* input,
576
                size_t length,
577
                enum idna_mode mode) {
578
6325
  UErrorCode status = U_ZERO_ERROR;
579
  uint32_t options =                  // CheckHyphens = false; handled later
580
    UIDNA_CHECK_BIDI |                // CheckBidi = true
581
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
582
6325
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
583
6325
  if (mode == IDNA_STRICT) {
584
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
585
                                      // VerifyDnsLength = beStrict;
586
                                      //   handled later
587
  }
588
589
6325
  UIDNA* uidna = uidna_openUTS46(options, &status);
590
6325
  if (U_FAILURE(status))
591
    return -1;
592
6325
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
593
594
  int32_t len = uidna_nameToASCII_UTF8(uidna,
595
                                       input, length,
596
6325
                                       **buf, buf->capacity(),
597
                                       &info,
598
6325
                                       &status);
599
600
6325
  if (status == U_BUFFER_OVERFLOW_ERROR) {
601
2
    status = U_ZERO_ERROR;
602
2
    buf->AllocateSufficientStorage(len);
603
    len = uidna_nameToASCII_UTF8(uidna,
604
                                 input, length,
605
2
                                 **buf, buf->capacity(),
606
                                 &info,
607
2
                                 &status);
608
  }
609
610
  // In UTS #46 which specifies ToASCII, certain error conditions are
611
  // configurable through options, and the WHATWG URL Standard promptly elects
612
  // to disable some of them to accommodate for real-world use cases.
613
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
614
  // these options through `options` above, and thus continues throwing
615
  // unnecessary errors. To counter this situation, we just filter out the
616
  // errors that may have happened afterwards, before deciding whether to
617
  // return an error from this function.
618
619
  // CheckHyphens = false
620
  // (Specified in the current UTS #46 draft rev. 18.)
621
  // Refs:
622
  // - https://github.com/whatwg/url/issues/53
623
  // - https://github.com/whatwg/url/pull/309
624
  // - http://www.unicode.org/review/pri317/
625
  // - http://www.unicode.org/reports/tr46/tr46-18.html
626
  // - https://www.icann.org/news/announcement-2000-01-07-en
627
6325
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
628
6325
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
629
6325
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
630
631
6325
  if (mode != IDNA_STRICT) {
632
    // VerifyDnsLength = beStrict
633
6325
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
634
6325
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
635
6325
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
636
  }
637
638


6325
  if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
639
78
    len = -1;
640
78
    buf->SetLength(0);
641
  } else {
642
6247
    buf->SetLength(len);
643
  }
644
645
6325
  uidna_close(uidna);
646
6325
  return len;
647
}
648
649
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
650
189
  Environment* env = Environment::GetCurrent(args);
651
189
  CHECK_GE(args.Length(), 1);
652
567
  CHECK(args[0]->IsString());
653
189
  Utf8Value val(env->isolate(), args[0]);
654
655
378
  MaybeStackBuffer<char> buf;
656
189
  int32_t len = ToUnicode(&buf, *val, val.length());
657
658
189
  if (len < 0) {
659
189
    return env->ThrowError("Cannot convert name to Unicode");
660
  }
661
662
  args.GetReturnValue().Set(
663
      String::NewFromUtf8(env->isolate(),
664
189
                          *buf,
665
                          NewStringType::kNormal,
666
756
                          len).ToLocalChecked());
667
}
668
669
4336
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
670
4336
  Environment* env = Environment::GetCurrent(args);
671
4336
  CHECK_GE(args.Length(), 1);
672
13008
  CHECK(args[0]->IsString());
673
4336
  Utf8Value val(env->isolate(), args[0]);
674
  // optional arg
675
13008
  bool lenient = args[1]->BooleanValue(env->isolate());
676
4336
  enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
677
678
8663
  MaybeStackBuffer<char> buf;
679
4336
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
680
681
4336
  if (len < 0) {
682
4345
    return env->ThrowError("Cannot convert name to ASCII");
683
  }
684
685
  args.GetReturnValue().Set(
686
      String::NewFromUtf8(env->isolate(),
687
4327
                          *buf,
688
                          NewStringType::kNormal,
689
17308
                          len).ToLocalChecked());
690
}
691
692
// This is similar to wcwidth except that it takes the current unicode
693
// character properties database into consideration, allowing it to
694
// correctly calculate the column widths of things like emoji's and
695
// newer wide characters. wcwidth, on the other hand, uses a fixed
696
// algorithm that does not take things like emoji into proper
697
// consideration.
698
//
699
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
700
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
701
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
702
// allow it to be input. Linux's PTY terminal prints control characters as
703
// Narrow rhombi.
704
//
705
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
706
// consonants are 0-width when combined with initial consonants; otherwise they
707
// are technically Wide. But many terminals (including Konsole and
708
// VTE/GLib-based) implement all medials and finals as 0-width.
709
//
710
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
711
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
712
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
713
125840
static int GetColumnWidth(UChar32 codepoint,
714
                          bool ambiguous_as_full_width = false) {
715
  const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
716
                               U_GC_CF_MASK |  // Format control character
717
                               U_GC_ME_MASK |  // Enclosing mark
718
125840
                               U_GC_MN_MASK;   // Nonspacing mark
719

251735
  if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
720
251623
      ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
721
125784
       u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
722
56
    return 0;
723
  }
724
725
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
726
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
727
  // or halfwidth.
728
125784
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
729

125784
  switch (eaw) {
730
    case U_EA_FULLWIDTH:
731
    case U_EA_WIDE:
732
50
      return 2;
733
    case U_EA_AMBIGUOUS:
734
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
735
4
      if (ambiguous_as_full_width) {
736
1
        return 2;
737
      }
738
      // If ambiguous_as_full_width is false:
739
      // Fall through
740
    case U_EA_NEUTRAL:
741
26
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
742
        return 2;
743
      }
744
      // Fall through
745
    case U_EA_HALFWIDTH:
746
    case U_EA_NARROW:
747
    default:
748
125733
      return 1;
749
  }
750
}
751
752
// Returns the column width for the given String.
753
125467
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
754
125467
  Environment* env = Environment::GetCurrent(args);
755
125467
  if (args.Length() < 1)
756
125376
    return;
757
758
250934
  bool ambiguous_as_full_width = args[1]->IsTrue();
759
250934
  bool expand_emoji_sequence = args[2]->IsTrue();
760
761
250934
  if (args[0]->IsNumber()) {
762
    uint32_t val;
763
501504
    if (!args[0]->Uint32Value(env->context()).To(&val)) return;
764
376128
    args.GetReturnValue().Set(GetColumnWidth(val, ambiguous_as_full_width));
765
125376
    return;
766
  }
767
768
91
  TwoByteValue value(env->isolate(), args[0]);
769
  // reinterpret_cast is required by windows to compile
770
91
  UChar* str = reinterpret_cast<UChar*>(*value);
771
  static_assert(sizeof(*str) == sizeof(**value),
772
                "sizeof(*str) == sizeof(**value)");
773
91
  UChar32 c = 0;
774
  UChar32 p;
775
91
  size_t n = 0;
776
91
  uint32_t width = 0;
777
778
649
  while (n < value.length()) {
779
467
    p = c;
780


467
    U16_NEXT(str, n, value.length(), c);
781
    // Don't count individual emoji codepoints that occur within an
782
    // emoji sequence. This is not necessarily foolproof. Some
783
    // environments display emoji sequences in the appropriate
784
    // condensed form (as a single emoji glyph), other environments
785
    // may not understand an emoji sequence and will display each
786
    // individual emoji separately. When this happens, the width
787
    // calculated will be off, and there's no reliable way of knowing
788
    // in advance if a particular sequence is going to be supported.
789
    // The expand_emoji_sequence option allows the caller to skip this
790
    // check and count each code within an emoji sequence separately.
791

1394
    if (!expand_emoji_sequence &&
792

933
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
793
3
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
794
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
795
3
      continue;
796
    }
797
464
    width += GetColumnWidth(c, ambiguous_as_full_width);
798
  }
799
182
  args.GetReturnValue().Set(width);
800
}
801
802
3661
void Initialize(Local<Object> target,
803
                Local<Value> unused,
804
                Local<Context> context,
805
                void* priv) {
806
3661
  Environment* env = Environment::GetCurrent(context);
807
3661
  env->SetMethod(target, "toUnicode", ToUnicode);
808
3661
  env->SetMethod(target, "toASCII", ToASCII);
809
3661
  env->SetMethod(target, "getStringWidth", GetStringWidth);
810
811
  // One-shot converters
812
3661
  env->SetMethod(target, "icuErrName", ICUErrorName);
813
3661
  env->SetMethod(target, "transcode", Transcode);
814
815
  // ConverterObject
816
3661
  env->SetMethod(target, "getConverter", ConverterObject::Create);
817
3661
  env->SetMethod(target, "decode", ConverterObject::Decode);
818
3661
  env->SetMethod(target, "hasConverter", ConverterObject::Has);
819
3661
}
820
821
}  // namespace i18n
822
}  // namespace node
823
824
3596
NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize)
825
826
#endif  // NODE_HAVE_I18N_SUPPORT