GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_i18n.cc Lines: 322 360 89.4 %
Date: 2019-02-13 22:28:58 Branches: 149 224 66.5 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
45
#if defined(NODE_HAVE_I18N_SUPPORT)
46
47
#include "base_object-inl.h"
48
#include "env-inl.h"
49
#include "node.h"
50
#include "node_buffer.h"
51
#include "node_errors.h"
52
#include "node_internals.h"
53
#include "util-inl.h"
54
#include "v8.h"
55
56
#include <unicode/utypes.h>
57
#include <unicode/putil.h>
58
#include <unicode/uchar.h>
59
#include <unicode/uclean.h>
60
#include <unicode/udata.h>
61
#include <unicode/uidna.h>
62
#include <unicode/ucnv.h>
63
#include <unicode/utf8.h>
64
#include <unicode/utf16.h>
65
#include <unicode/timezone.h>
66
#include <unicode/ulocdata.h>
67
#include <unicode/uvernum.h>
68
#include <unicode/uversion.h>
69
#include <unicode/ustring.h>
70
71
#ifdef NODE_HAVE_SMALL_ICU
72
/* if this is defined, we have a 'secondary' entry point.
73
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
74
#define SMALL_ICUDATA_ENTRY_POINT \
75
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
76
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
77
#ifndef U_LIB_SUFFIX_C_NAME
78
#define SMALL_DEF(major, suff) icusmdt##major##_dat
79
#else
80
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
81
#endif
82
83
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
84
#endif
85
86
namespace node {
87
88
using v8::Context;
89
using v8::FunctionCallbackInfo;
90
using v8::HandleScope;
91
using v8::Int32;
92
using v8::Isolate;
93
using v8::Local;
94
using v8::MaybeLocal;
95
using v8::NewStringType;
96
using v8::Object;
97
using v8::ObjectTemplate;
98
using v8::String;
99
using v8::Value;
100
101
namespace i18n {
102
namespace {
103
104
template <typename T>
105
534
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
106
534
  MaybeLocal<Object> ret = Buffer::New(env, buf);
107

534
  if (ret.IsEmpty())
108
    return ret;
109
110
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
111
                "Currently only one- or two-byte buffers are supported");
112

534
  if (sizeof(T) > 1 && IsBigEndian()) {
113
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
114
    SwapBytes16(retbuf_data, retbuf_length);
115
  }
116
117
534
  return ret;
118
}
119
120
struct Converter {
121
624
  explicit Converter(const char* name, const char* sub = nullptr)
122
624
      : conv(nullptr) {
123
624
    UErrorCode status = U_ZERO_ERROR;
124
624
    conv = ucnv_open(name, &status);
125
624
    CHECK(U_SUCCESS(status));
126
624
    if (sub != nullptr) {
127
2
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
128
    }
129
624
  }
130
131
205
  explicit Converter(UConverter* converter,
132
205
                     const char* sub = nullptr) : conv(converter) {
133
205
    CHECK_NOT_NULL(conv);
134
205
    UErrorCode status = U_ZERO_ERROR;
135
205
    if (sub != nullptr) {
136
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
137
    }
138
205
  }
139
140
829
  ~Converter() {
141
829
    ucnv_close(conv);
142
829
  }
143
144
  UConverter* conv;
145
};
146
147
class ConverterObject : public BaseObject, Converter {
148
 public:
149
  enum ConverterFlags {
150
    CONVERTER_FLAGS_FLUSH      = 0x1,
151
    CONVERTER_FLAGS_FATAL      = 0x2,
152
    CONVERTER_FLAGS_IGNORE_BOM = 0x4
153
  };
154
155
410
  ~ConverterObject() override {}
156
157
2
  static void Has(const FunctionCallbackInfo<Value>& args) {
158
2
    Environment* env = Environment::GetCurrent(args);
159
2
    HandleScope scope(env->isolate());
160
161
2
    CHECK_GE(args.Length(), 1);
162
4
    Utf8Value label(env->isolate(), args[0]);
163
164
2
    UErrorCode status = U_ZERO_ERROR;
165
2
    UConverter* conv = ucnv_open(*label, &status);
166
6
    args.GetReturnValue().Set(!!U_SUCCESS(status));
167
4
    ucnv_close(conv);
168
2
  }
169
170
205
  static void Create(const FunctionCallbackInfo<Value>& args) {
171
205
    Environment* env = Environment::GetCurrent(args);
172
205
    HandleScope scope(env->isolate());
173
174
205
    Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate());
175
205
    t->SetInternalFieldCount(1);
176
    Local<Object> obj;
177
615
    if (!t->NewInstance(env->context()).ToLocal(&obj)) return;
178
179
205
    CHECK_GE(args.Length(), 2);
180
410
    Utf8Value label(env->isolate(), args[0]);
181
820
    int flags = args[1]->Uint32Value(env->context()).ToChecked();
182
    bool fatal =
183
205
        (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
184
    bool ignoreBOM =
185
205
        (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM;
186
187
205
    UErrorCode status = U_ZERO_ERROR;
188
205
    UConverter* conv = ucnv_open(*label, &status);
189
205
    if (U_FAILURE(status))
190
      return;
191
192
205
    if (fatal) {
193
94
      status = U_ZERO_ERROR;
194
      ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
195
94
                          nullptr, nullptr, nullptr, &status);
196
    }
197
198
205
    new ConverterObject(env, obj, conv, ignoreBOM);
199
615
    args.GetReturnValue().Set(obj);
200
  }
201
202
616
  static void Decode(const FunctionCallbackInfo<Value>& args) {
203
616
    Environment* env = Environment::GetCurrent(args);
204
205
616
    CHECK_GE(args.Length(), 3);  // Converter, Buffer, Flags
206
207
616
    Converter utf8("utf8");
208
    ConverterObject* converter;
209
1232
    ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
210

4928
    SPREAD_BUFFER_ARG(args[1], input_obj);
211
2464
    int flags = args[2]->Uint32Value(env->context()).ToChecked();
212
213
616
    UErrorCode status = U_ZERO_ERROR;
214
708
    MaybeStackBuffer<UChar> result;
215
    MaybeLocal<Object> ret;
216
616
    size_t limit = ucnv_getMinCharSize(converter->conv) *
217
616
                   input_obj_length;
218
616
    if (limit > 0)
219
590
      result.AllocateSufficientStorage(limit);
220
221
616
    UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
222
616
    OnScopeLeave cleanup([&]() {
223
616
      if (flush) {
224
        // Reset the converter state.
225
198
        converter->bomSeen_ = false;
226
198
        ucnv_reset(converter->conv);
227
      }
228
1324
    });
229
230
616
    const char* source = input_obj_data;
231
616
    size_t source_length = input_obj_length;
232
233

616
    if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
234
187
      int32_t bomOffset = 0;
235
187
      ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
236
187
      source += bomOffset;
237
187
      source_length -= bomOffset;
238
187
      converter->bomSeen_ = true;
239
    }
240
241
616
    UChar* target = *result;
242
    ucnv_toUnicode(converter->conv,
243
616
                   &target, target + (limit * sizeof(UChar)),
244
                   &source, source + source_length,
245
1232
                   nullptr, flush, &status);
246
247
616
    if (U_SUCCESS(status)) {
248
524
      if (limit > 0)
249
498
        result.SetLength(target - &result[0]);
250
524
      ret = ToBufferEndian(env, &result);
251
1048
      args.GetReturnValue().Set(ret.ToLocalChecked());
252
524
      return;
253
    }
254
255
368
    args.GetReturnValue().Set(status);
256
  }
257
258
  SET_NO_MEMORY_INFO()
259
  SET_MEMORY_INFO_NAME(ConverterObject)
260
  SET_SELF_SIZE(ConverterObject)
261
262
 protected:
263
205
  ConverterObject(Environment* env,
264
                  Local<Object> wrap,
265
                  UConverter* converter,
266
                  bool ignoreBOM,
267
                  const char* sub = nullptr) :
268
                  BaseObject(env, wrap),
269
                  Converter(converter, sub),
270
205
                  ignoreBOM_(ignoreBOM) {
271
205
    MakeWeak();
272
273
205
    switch (ucnv_getType(converter)) {
274
      case UCNV_UTF8:
275
      case UCNV_UTF16_BigEndian:
276
      case UCNV_UTF16_LittleEndian:
277
205
        unicode_ = true;
278
205
        break;
279
      default:
280
        unicode_ = false;
281
    }
282
205
  }
283
284
 private:
285
  bool unicode_ = false;     // True if this is a Unicode converter
286
  bool ignoreBOM_ = false;   // True if the BOM should be ignored on Unicode
287
  bool bomSeen_ = false;     // True if the BOM has been seen
288
};
289
290
// One-Shot Converters
291
292
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
293
                      const char* data,
294
                      const size_t length,
295
                      const size_t length_in_chars) {
296
2
  dest->AllocateSufficientStorage(length_in_chars);
297
2
  char* dst = reinterpret_cast<char*>(**dest);
298
2
  memcpy(dst, data, length);
299
2
  if (IsBigEndian()) {
300
    SwapBytes16(dst, length);
301
  }
302
2
}
303
304
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
305
                                            const char* fromEncoding,
306
                                            const char* toEncoding,
307
                                            const char* source,
308
                                            const size_t source_length,
309
                                            UErrorCode* status);
310
311
2
MaybeLocal<Object> Transcode(Environment* env,
312
                             const char* fromEncoding,
313
                             const char* toEncoding,
314
                             const char* source,
315
                             const size_t source_length,
316
                             UErrorCode* status) {
317
2
  *status = U_ZERO_ERROR;
318
  MaybeLocal<Object> ret;
319
2
  MaybeStackBuffer<char> result;
320
4
  Converter to(toEncoding, "?");
321
4
  Converter from(fromEncoding);
322
2
  const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv);
323
2
  result.AllocateSufficientStorage(limit);
324
2
  char* target = *result;
325
  ucnv_convertEx(to.conv, from.conv, &target, target + limit,
326
                 &source, source + source_length, nullptr, nullptr,
327
2
                 nullptr, nullptr, true, true, status);
328
2
  if (U_SUCCESS(*status)) {
329
2
    result.SetLength(target - &result[0]);
330
2
    ret = ToBufferEndian(env, &result);
331
  }
332
4
  return ret;
333
}
334
335
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
336
                                   const char* fromEncoding,
337
                                   const char* toEncoding,
338
                                   const char* source,
339
                                   const size_t source_length,
340
                                   UErrorCode* status) {
341
4
  *status = U_ZERO_ERROR;
342
  MaybeLocal<Object> ret;
343
4
  MaybeStackBuffer<UChar> destbuf(source_length);
344
8
  Converter from(fromEncoding);
345
4
  const size_t length_in_chars = source_length * sizeof(UChar);
346
  ucnv_toUChars(from.conv, *destbuf, length_in_chars,
347
4
                source, source_length, status);
348
4
  if (U_SUCCESS(*status))
349
4
    ret = ToBufferEndian(env, &destbuf);
350
8
  return ret;
351
}
352
353
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
354
                                     const char* fromEncoding,
355
                                     const char* toEncoding,
356
                                     const char* source,
357
                                     const size_t source_length,
358
                                     UErrorCode* status) {
359
  *status = U_ZERO_ERROR;
360
  MaybeStackBuffer<UChar> sourcebuf;
361
  MaybeLocal<Object> ret;
362
  Converter to(toEncoding, "?");
363
  const size_t length_in_chars = source_length / sizeof(UChar);
364
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
365
  MaybeStackBuffer<char> destbuf(length_in_chars);
366
  const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars,
367
                                       *sourcebuf, length_in_chars, status);
368
  if (U_SUCCESS(*status)) {
369
    destbuf.SetLength(len);
370
    ret = ToBufferEndian(env, &destbuf);
371
  }
372
  return ret;
373
}
374
375
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
376
                                         const char* fromEncoding,
377
                                         const char* toEncoding,
378
                                         const char* source,
379
                                         const size_t source_length,
380
                                         UErrorCode* status) {
381
2
  *status = U_ZERO_ERROR;
382
2
  MaybeStackBuffer<UChar> destbuf;
383
  int32_t result_length;
384
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
385
4
                source, source_length, status);
386
  MaybeLocal<Object> ret;
387
2
  if (U_SUCCESS(*status)) {
388
1
    destbuf.SetLength(result_length);
389
1
    ret = ToBufferEndian(env, &destbuf);
390
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
391
1
    *status = U_ZERO_ERROR;
392
1
    destbuf.AllocateSufficientStorage(result_length);
393
    u_strFromUTF8(*destbuf, result_length, &result_length,
394
1
                  source, source_length, status);
395
1
    if (U_SUCCESS(*status)) {
396
1
      destbuf.SetLength(result_length);
397
1
      ret = ToBufferEndian(env, &destbuf);
398
    }
399
  }
400
2
  return ret;
401
}
402
403
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
404
                                         const char* fromEncoding,
405
                                         const char* toEncoding,
406
                                         const char* source,
407
                                         const size_t source_length,
408
                                         UErrorCode* status) {
409
2
  *status = U_ZERO_ERROR;
410
  MaybeLocal<Object> ret;
411
2
  const size_t length_in_chars = source_length / sizeof(UChar);
412
  int32_t result_length;
413
2
  MaybeStackBuffer<UChar> sourcebuf;
414
4
  MaybeStackBuffer<char> destbuf;
415
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
416
2
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
417
4
              *sourcebuf, length_in_chars, status);
418
2
  if (U_SUCCESS(*status)) {
419
1
    destbuf.SetLength(result_length);
420
1
    ret = ToBufferEndian(env, &destbuf);
421
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
422
1
    *status = U_ZERO_ERROR;
423
1
    destbuf.AllocateSufficientStorage(result_length);
424
1
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
425
2
                length_in_chars, status);
426
1
    if (U_SUCCESS(*status)) {
427
1
      destbuf.SetLength(result_length);
428
1
      ret = ToBufferEndian(env, &destbuf);
429
    }
430
  }
431
4
  return ret;
432
}
433
434
20
const char* EncodingName(const enum encoding encoding) {
435

20
  switch (encoding) {
436
2
    case ASCII: return "us-ascii";
437
4
    case LATIN1: return "iso8859-1";
438
8
    case UCS2: return "utf16le";
439
6
    case UTF8: return "utf-8";
440
    default: return nullptr;
441
  }
442
}
443
444
22
bool SupportedEncoding(const enum encoding encoding) {
445
22
  switch (encoding) {
446
    case ASCII:
447
    case LATIN1:
448
    case UCS2:
449
20
    case UTF8: return true;
450
2
    default: return false;
451
  }
452
}
453
454
12
void Transcode(const FunctionCallbackInfo<Value>&args) {
455
12
  Environment* env = Environment::GetCurrent(args);
456
12
  Isolate* isolate = env->isolate();
457
12
  UErrorCode status = U_ZERO_ERROR;
458
  MaybeLocal<Object> result;
459
460
12
  CHECK(Buffer::HasInstance(args[0]));
461

96
  SPREAD_BUFFER_ARG(args[0], ts_obj);
462
12
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
463
12
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
464
465

12
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
466
10
    TranscodeFunc tfn = &Transcode;
467

10
    switch (fromEncoding) {
468
      case ASCII:
469
      case LATIN1:
470
4
        if (toEncoding == UCS2)
471
4
          tfn = &TranscodeToUcs2;
472
4
        break;
473
      case UTF8:
474
4
        if (toEncoding == UCS2)
475
2
          tfn = &TranscodeUcs2FromUtf8;
476
4
        break;
477
      case UCS2:
478
2
        switch (toEncoding) {
479
          case UCS2:
480
            tfn = &Transcode;
481
            break;
482
          case UTF8:
483
2
            tfn = &TranscodeUtf8FromUcs2;
484
2
            break;
485
          default:
486
            tfn = &TranscodeFromUcs2;
487
        }
488
2
        break;
489
      default:
490
        // This should not happen because of the SupportedEncoding checks
491
        ABORT();
492
    }
493
494
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
495
10
                 ts_obj_data, ts_obj_length, &status);
496
  } else {
497
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
498
  }
499
500
12
  if (result.IsEmpty())
501
6
    return args.GetReturnValue().Set(status);
502
503
20
  return args.GetReturnValue().Set(result.ToLocalChecked());
504
}
505
506
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
507
2
  Environment* env = Environment::GetCurrent(args);
508
4
  CHECK(args[0]->IsInt32());
509
6
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
510
  args.GetReturnValue().Set(
511
      String::NewFromUtf8(env->isolate(),
512
                          u_errorName(status),
513
6
                          NewStringType::kNormal).ToLocalChecked());
514
2
}
515
516
}  // anonymous namespace
517
518
4271
bool InitializeICUDirectory(const std::string& path) {
519
4271
  UErrorCode status = U_ZERO_ERROR;
520
4271
  if (path.empty()) {
521
#ifdef NODE_HAVE_SMALL_ICU
522
    // install the 'small' data.
523
4269
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
524
#else  // !NODE_HAVE_SMALL_ICU
525
    // no small data, so nothing to do.
526
#endif  // !NODE_HAVE_SMALL_ICU
527
  } else {
528
2
    u_setDataDirectory(path.c_str());
529
2
    u_init(&status);
530
  }
531
4271
  return status == U_ZERO_ERROR;
532
}
533
534
382
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
535
                  const char* input,
536
                  size_t length) {
537
382
  UErrorCode status = U_ZERO_ERROR;
538
382
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
539
382
  UIDNA* uidna = uidna_openUTS46(options, &status);
540
382
  if (U_FAILURE(status))
541
    return -1;
542
382
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
543
544
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
545
                                        input, length,
546
382
                                        **buf, buf->capacity(),
547
                                        &info,
548
382
                                        &status);
549
550
  // Do not check info.errors like we do with ToASCII since ToUnicode always
551
  // returns a string, despite any possible errors that may have occurred.
552
553
382
  if (status == U_BUFFER_OVERFLOW_ERROR) {
554
    status = U_ZERO_ERROR;
555
    buf->AllocateSufficientStorage(len);
556
    len = uidna_nameToUnicodeUTF8(uidna,
557
                                  input, length,
558
                                  **buf, buf->capacity(),
559
                                  &info,
560
                                  &status);
561
  }
562
563
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
564
  // string, regardless of whether an error occurred.
565
566
382
  if (U_FAILURE(status)) {
567
    len = -1;
568
    buf->SetLength(0);
569
  } else {
570
382
    buf->SetLength(len);
571
  }
572
573
382
  uidna_close(uidna);
574
382
  return len;
575
}
576
577
12399
int32_t ToASCII(MaybeStackBuffer<char>* buf,
578
                const char* input,
579
                size_t length,
580
                enum idna_mode mode) {
581
12399
  UErrorCode status = U_ZERO_ERROR;
582
  uint32_t options =                  // CheckHyphens = false; handled later
583
    UIDNA_CHECK_BIDI |                // CheckBidi = true
584
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
585
12399
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
586
12399
  if (mode == IDNA_STRICT) {
587
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
588
                                      // VerifyDnsLength = beStrict;
589
                                      //   handled later
590
  }
591
592
12399
  UIDNA* uidna = uidna_openUTS46(options, &status);
593
12399
  if (U_FAILURE(status))
594
    return -1;
595
12399
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
596
597
  int32_t len = uidna_nameToASCII_UTF8(uidna,
598
                                       input, length,
599
12399
                                       **buf, buf->capacity(),
600
                                       &info,
601
12399
                                       &status);
602
603
12399
  if (status == U_BUFFER_OVERFLOW_ERROR) {
604
2
    status = U_ZERO_ERROR;
605
2
    buf->AllocateSufficientStorage(len);
606
    len = uidna_nameToASCII_UTF8(uidna,
607
                                 input, length,
608
2
                                 **buf, buf->capacity(),
609
                                 &info,
610
2
                                 &status);
611
  }
612
613
  // In UTS #46 which specifies ToASCII, certain error conditions are
614
  // configurable through options, and the WHATWG URL Standard promptly elects
615
  // to disable some of them to accommodate for real-world use cases.
616
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
617
  // these options through `options` above, and thus continues throwing
618
  // unnecessary errors. To counter this situation, we just filter out the
619
  // errors that may have happened afterwards, before deciding whether to
620
  // return an error from this function.
621
622
  // CheckHyphens = false
623
  // (Specified in the current UTS #46 draft rev. 18.)
624
  // Refs:
625
  // - https://github.com/whatwg/url/issues/53
626
  // - https://github.com/whatwg/url/pull/309
627
  // - http://www.unicode.org/review/pri317/
628
  // - http://www.unicode.org/reports/tr46/tr46-18.html
629
  // - https://www.icann.org/news/announcement-2000-01-07-en
630
12399
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
631
12399
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
632
12399
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
633
634
12399
  if (mode != IDNA_STRICT) {
635
    // VerifyDnsLength = beStrict
636
12399
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
637
12399
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
638
12399
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
639
  }
640
641


12399
  if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
642
78
    len = -1;
643
78
    buf->SetLength(0);
644
  } else {
645
12321
    buf->SetLength(len);
646
  }
647
648
12399
  uidna_close(uidna);
649
12399
  return len;
650
}
651
652
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
653
189
  Environment* env = Environment::GetCurrent(args);
654
189
  CHECK_GE(args.Length(), 1);
655
567
  CHECK(args[0]->IsString());
656
189
  Utf8Value val(env->isolate(), args[0]);
657
658
378
  MaybeStackBuffer<char> buf;
659
189
  int32_t len = ToUnicode(&buf, *val, val.length());
660
661
189
  if (len < 0) {
662
189
    return env->ThrowError("Cannot convert name to Unicode");
663
  }
664
665
  args.GetReturnValue().Set(
666
      String::NewFromUtf8(env->isolate(),
667
189
                          *buf,
668
                          NewStringType::kNormal,
669
756
                          len).ToLocalChecked());
670
}
671
672
9792
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
673
9792
  Environment* env = Environment::GetCurrent(args);
674
9792
  CHECK_GE(args.Length(), 1);
675
29376
  CHECK(args[0]->IsString());
676
9792
  Utf8Value val(env->isolate(), args[0]);
677
  // optional arg
678
29376
  bool lenient = args[1]->BooleanValue(env->isolate());
679
9792
  enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
680
681
19575
  MaybeStackBuffer<char> buf;
682
9792
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
683
684
9792
  if (len < 0) {
685
9801
    return env->ThrowError("Cannot convert name to ASCII");
686
  }
687
688
  args.GetReturnValue().Set(
689
      String::NewFromUtf8(env->isolate(),
690
9783
                          *buf,
691
                          NewStringType::kNormal,
692
39132
                          len).ToLocalChecked());
693
}
694
695
// This is similar to wcwidth except that it takes the current unicode
696
// character properties database into consideration, allowing it to
697
// correctly calculate the column widths of things like emoji's and
698
// newer wide characters. wcwidth, on the other hand, uses a fixed
699
// algorithm that does not take things like emoji into proper
700
// consideration.
701
//
702
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
703
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
704
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
705
// allow it to be input. Linux's PTY terminal prints control characters as
706
// Narrow rhombi.
707
//
708
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
709
// consonants are 0-width when combined with initial consonants; otherwise they
710
// are technically Wide. But many terminals (including Konsole and
711
// VTE/GLib-based) implement all medials and finals as 0-width.
712
//
713
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
714
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
715
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
716
126156
static int GetColumnWidth(UChar32 codepoint,
717
                          bool ambiguous_as_full_width = false) {
718
  const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
719
                               U_GC_CF_MASK |  // Format control character
720
                               U_GC_ME_MASK |  // Enclosing mark
721
126156
                               U_GC_MN_MASK;   // Nonspacing mark
722

252367
  if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
723
252255
      ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
724
126100
       u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
725
56
    return 0;
726
  }
727
728
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
729
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
730
  // or halfwidth.
731
126100
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
732

126100
  switch (eaw) {
733
    case U_EA_FULLWIDTH:
734
    case U_EA_WIDE:
735
50
      return 2;
736
    case U_EA_AMBIGUOUS:
737
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
738
4
      if (ambiguous_as_full_width) {
739
1
        return 2;
740
      }
741
      // If ambiguous_as_full_width is false:
742
      // Fall through
743
    case U_EA_NEUTRAL:
744
26
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
745
        return 2;
746
      }
747
      // Fall through
748
    case U_EA_HALFWIDTH:
749
    case U_EA_NARROW:
750
    default:
751
126049
      return 1;
752
  }
753
}
754
755
// Returns the column width for the given String.
756
125783
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
757
125783
  Environment* env = Environment::GetCurrent(args);
758
125783
  if (args.Length() < 1)
759
125692
    return;
760
761
251566
  bool ambiguous_as_full_width = args[1]->IsTrue();
762
251566
  bool expand_emoji_sequence = args[2]->IsTrue();
763
764
251566
  if (args[0]->IsNumber()) {
765
    uint32_t val;
766
502768
    if (!args[0]->Uint32Value(env->context()).To(&val)) return;
767
377076
    args.GetReturnValue().Set(GetColumnWidth(val, ambiguous_as_full_width));
768
125692
    return;
769
  }
770
771
91
  TwoByteValue value(env->isolate(), args[0]);
772
  // reinterpret_cast is required by windows to compile
773
91
  UChar* str = reinterpret_cast<UChar*>(*value);
774
  static_assert(sizeof(*str) == sizeof(**value),
775
                "sizeof(*str) == sizeof(**value)");
776
91
  UChar32 c = 0;
777
  UChar32 p;
778
91
  size_t n = 0;
779
91
  uint32_t width = 0;
780
781
649
  while (n < value.length()) {
782
467
    p = c;
783


467
    U16_NEXT(str, n, value.length(), c);
784
    // Don't count individual emoji codepoints that occur within an
785
    // emoji sequence. This is not necessarily foolproof. Some
786
    // environments display emoji sequences in the appropriate
787
    // condensed form (as a single emoji glyph), other environments
788
    // may not understand an emoji sequence and will display each
789
    // individual emoji separately. When this happens, the width
790
    // calculated will be off, and there's no reliable way of knowing
791
    // in advance if a particular sequence is going to be supported.
792
    // The expand_emoji_sequence option allows the caller to skip this
793
    // check and count each code within an emoji sequence separately.
794

1394
    if (!expand_emoji_sequence &&
795

933
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
796
3
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
797
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
798
3
      continue;
799
    }
800
464
    width += GetColumnWidth(c, ambiguous_as_full_width);
801
  }
802
182
  args.GetReturnValue().Set(width);
803
}
804
805
4410
void Initialize(Local<Object> target,
806
                Local<Value> unused,
807
                Local<Context> context,
808
                void* priv) {
809
4410
  Environment* env = Environment::GetCurrent(context);
810
4411
  env->SetMethod(target, "toUnicode", ToUnicode);
811
4410
  env->SetMethod(target, "toASCII", ToASCII);
812
4410
  env->SetMethod(target, "getStringWidth", GetStringWidth);
813
814
  // One-shot converters
815
4410
  env->SetMethod(target, "icuErrName", ICUErrorName);
816
4410
  env->SetMethod(target, "transcode", Transcode);
817
818
  // ConverterObject
819
4410
  env->SetMethod(target, "getConverter", ConverterObject::Create);
820
4410
  env->SetMethod(target, "decode", ConverterObject::Decode);
821
4410
  env->SetMethod(target, "hasConverter", ConverterObject::Has);
822
4410
}
823
824
}  // namespace i18n
825
}  // namespace node
826
827
4314
NODE_MODULE_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize)
828
829
#endif  // NODE_HAVE_I18N_SUPPORT