GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage/nodes/benchmark/out/../src/node_i18n.cc Lines: 332 381 87.1 %
Date: 2017-12-18 Branches: 153 236 64.8 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
45
#if defined(NODE_HAVE_I18N_SUPPORT)
46
47
#include "node.h"
48
#include "node_buffer.h"
49
#include "env-inl.h"
50
#include "util-inl.h"
51
#include "base_object-inl.h"
52
#include "v8.h"
53
54
#include <unicode/utypes.h>
55
#include <unicode/putil.h>
56
#include <unicode/uchar.h>
57
#include <unicode/uclean.h>
58
#include <unicode/udata.h>
59
#include <unicode/uidna.h>
60
#include <unicode/ucnv.h>
61
#include <unicode/utf8.h>
62
#include <unicode/utf16.h>
63
#include <unicode/timezone.h>
64
#include <unicode/ulocdata.h>
65
#include <unicode/uvernum.h>
66
#include <unicode/uversion.h>
67
#include <unicode/ustring.h>
68
69
#ifdef NODE_HAVE_SMALL_ICU
70
/* if this is defined, we have a 'secondary' entry point.
71
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
72
#define SMALL_ICUDATA_ENTRY_POINT \
73
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
74
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
75
#ifndef U_LIB_SUFFIX_C_NAME
76
#define SMALL_DEF(major, suff) icusmdt##major##_dat
77
#else
78
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
79
#endif
80
81
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
82
#endif
83
84
namespace node {
85
86
using v8::Context;
87
using v8::FunctionCallbackInfo;
88
using v8::HandleScope;
89
using v8::Isolate;
90
using v8::Local;
91
using v8::MaybeLocal;
92
using v8::Object;
93
using v8::ObjectTemplate;
94
using v8::String;
95
using v8::Value;
96
97
namespace i18n {
98
namespace {
99
100
template <typename T>
101
339
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
102
339
  MaybeLocal<Object> ret = Buffer::New(env, buf);
103

339
  if (ret.IsEmpty())
104
    return ret;
105
106
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
107
                "Currently only one- or two-byte buffers are supported");
108

339
  if (sizeof(T) > 1 && IsBigEndian()) {
109
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
110
    SwapBytes16(retbuf_data, retbuf_length);
111
  }
112
113
339
  return ret;
114
}
115
116
struct Converter {
117
384
  explicit Converter(const char* name, const char* sub = nullptr)
118
384
      : conv(nullptr) {
119
384
    UErrorCode status = U_ZERO_ERROR;
120
384
    conv = ucnv_open(name, &status);
121
384
    CHECK(U_SUCCESS(status));
122
384
    if (sub != nullptr) {
123
2
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
124
    }
125
384
  }
126
127
125
  explicit Converter(UConverter* converter,
128
125
                     const char* sub = nullptr) : conv(converter) {
129
125
    CHECK_NE(conv, nullptr);
130
125
    UErrorCode status = U_ZERO_ERROR;
131
125
    if (sub != nullptr) {
132
      ucnv_setSubstChars(conv, sub, strlen(sub), &status);
133
    }
134
125
  }
135
136
384
  ~Converter() {
137
384
    ucnv_close(conv);
138
384
  }
139
140
  UConverter* conv;
141
};
142
143
class ConverterObject : public BaseObject, Converter {
144
 public:
145
  enum ConverterFlags {
146
    CONVERTER_FLAGS_FLUSH      = 0x1,
147
    CONVERTER_FLAGS_FATAL      = 0x2,
148
    CONVERTER_FLAGS_IGNORE_BOM = 0x4
149
  };
150
151
  ~ConverterObject() override {}
152
153
  static void Has(const FunctionCallbackInfo<Value>& args) {
154
    Environment* env = Environment::GetCurrent(args);
155
    HandleScope scope(env->isolate());
156
157
    CHECK_GE(args.Length(), 1);
158
    Utf8Value label(env->isolate(), args[0]);
159
160
    UErrorCode status = U_ZERO_ERROR;
161
    UConverter* conv = ucnv_open(*label, &status);
162
    args.GetReturnValue().Set(!!U_SUCCESS(status));
163
    ucnv_close(conv);
164
  }
165
166
125
  static void Create(const FunctionCallbackInfo<Value>& args) {
167
125
    Environment* env = Environment::GetCurrent(args);
168
125
    HandleScope scope(env->isolate());
169
170
125
    CHECK_GE(args.Length(), 2);
171
250
    Utf8Value label(env->isolate(), args[0]);
172
500
    int flags = args[1]->Uint32Value(env->context()).ToChecked();
173
    bool fatal =
174
125
        (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
175
    bool ignoreBOM =
176
125
        (flags & CONVERTER_FLAGS_IGNORE_BOM) == CONVERTER_FLAGS_IGNORE_BOM;
177
178
125
    UErrorCode status = U_ZERO_ERROR;
179
125
    UConverter* conv = ucnv_open(*label, &status);
180
125
    if (U_FAILURE(status))
181
125
      return;
182
183
125
    if (fatal) {
184
50
      status = U_ZERO_ERROR;
185
      ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
186
50
                          nullptr, nullptr, nullptr, &status);
187
    }
188
189
125
    Local<ObjectTemplate> t = ObjectTemplate::New(env->isolate());
190
125
    t->SetInternalFieldCount(1);
191
375
    Local<Object> obj = t->NewInstance(env->context()).ToLocalChecked();
192
125
    new ConverterObject(env, obj, conv, ignoreBOM);
193
375
    args.GetReturnValue().Set(obj);
194
  }
195
196
376
  static void Decode(const FunctionCallbackInfo<Value>& args) {
197
376
    Environment* env = Environment::GetCurrent(args);
198
199
376
    CHECK_GE(args.Length(), 3);  // Converter, Buffer, Flags
200
201
376
    Converter utf8("utf8");
202
    ConverterObject* converter;
203
1128
    ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
204

3008
    SPREAD_BUFFER_ARG(args[1], input_obj);
205
1504
    int flags = args[2]->Uint32Value(env->context()).ToChecked();
206
207
376
    UErrorCode status = U_ZERO_ERROR;
208
752
    MaybeStackBuffer<UChar> result;
209
    MaybeLocal<Object> ret;
210
376
    size_t limit = ucnv_getMinCharSize(converter->conv) *
211
376
                   input_obj_length;
212
376
    if (limit > 0)
213
361
      result.AllocateSufficientStorage(limit);
214
215
376
    UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
216
217
376
    const char* source = input_obj_data;
218
376
    size_t source_length = input_obj_length;
219
220

376
    if (converter->unicode_ && !converter->ignoreBOM_ && !converter->bomSeen_) {
221
109
      int32_t bomOffset = 0;
222
109
      ucnv_detectUnicodeSignature(source, source_length, &bomOffset, &status);
223
109
      source += bomOffset;
224
109
      source_length -= bomOffset;
225
109
      converter->bomSeen_ = true;
226
    }
227
228
376
    UChar* target = *result;
229
    ucnv_toUnicode(converter->conv,
230
                   &target, target + (limit * sizeof(UChar)),
231
                   &source, source + source_length,
232
376
                   nullptr, flush, &status);
233
234
376
    if (U_SUCCESS(status)) {
235
329
      if (limit > 0)
236
314
        result.SetLength(target - &result[0]);
237
329
      ret = ToBufferEndian(env, &result);
238
658
      args.GetReturnValue().Set(ret.ToLocalChecked());
239
329
      goto reset;
240
    }
241
242
141
    args.GetReturnValue().Set(status);
243
244
   reset:
245
376
    if (flush) {
246
      // Reset the converter state
247
118
      converter->bomSeen_ = false;
248
118
      ucnv_reset(converter->conv);
249
376
    }
250
  }
251
252
 protected:
253
125
  ConverterObject(Environment* env,
254
                  v8::Local<v8::Object> wrap,
255
                  UConverter* converter,
256
                  bool ignoreBOM,
257
                  const char* sub = nullptr) :
258
                  BaseObject(env, wrap),
259
                  Converter(converter, sub),
260
125
                  ignoreBOM_(ignoreBOM) {
261
125
    MakeWeak<ConverterObject>(this);
262
263
125
    switch (ucnv_getType(converter)) {
264
      case UCNV_UTF8:
265
      case UCNV_UTF16_BigEndian:
266
      case UCNV_UTF16_LittleEndian:
267
125
        unicode_ = true;
268
125
        break;
269
      default:
270
        unicode_ = false;
271
    }
272
125
  }
273
274
 private:
275
  bool unicode_ = false;     // True if this is a Unicode converter
276
  bool ignoreBOM_ = false;   // True if the BOM should be ignored on Unicode
277
  bool bomSeen_ = false;     // True if the BOM has been seen
278
};
279
280
// One-Shot Converters
281
282
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
283
                      const char* data,
284
                      const size_t length,
285
                      const size_t length_in_chars) {
286
2
  dest->AllocateSufficientStorage(length_in_chars);
287
2
  char* dst = reinterpret_cast<char*>(**dest);
288
2
  memcpy(dst, data, length);
289
2
  if (IsBigEndian()) {
290
    SwapBytes16(dst, length);
291
  }
292
2
}
293
294
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
295
                                            const char* fromEncoding,
296
                                            const char* toEncoding,
297
                                            const char* source,
298
                                            const size_t source_length,
299
                                            UErrorCode* status);
300
301
2
MaybeLocal<Object> Transcode(Environment* env,
302
                             const char* fromEncoding,
303
                             const char* toEncoding,
304
                             const char* source,
305
                             const size_t source_length,
306
                             UErrorCode* status) {
307
2
  *status = U_ZERO_ERROR;
308
  MaybeLocal<Object> ret;
309
2
  MaybeStackBuffer<char> result;
310
4
  Converter to(toEncoding, "?");
311
4
  Converter from(fromEncoding);
312
2
  const uint32_t limit = source_length * ucnv_getMaxCharSize(to.conv);
313
2
  result.AllocateSufficientStorage(limit);
314
2
  char* target = *result;
315
  ucnv_convertEx(to.conv, from.conv, &target, target + limit,
316
                 &source, source + source_length, nullptr, nullptr,
317
2
                 nullptr, nullptr, true, true, status);
318
2
  if (U_SUCCESS(*status)) {
319
2
    result.SetLength(target - &result[0]);
320
2
    ret = ToBufferEndian(env, &result);
321
  }
322
4
  return ret;
323
}
324
325
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
326
                                   const char* fromEncoding,
327
                                   const char* toEncoding,
328
                                   const char* source,
329
                                   const size_t source_length,
330
                                   UErrorCode* status) {
331
4
  *status = U_ZERO_ERROR;
332
  MaybeLocal<Object> ret;
333
4
  MaybeStackBuffer<UChar> destbuf(source_length);
334
8
  Converter from(fromEncoding);
335
4
  const size_t length_in_chars = source_length * sizeof(UChar);
336
  ucnv_toUChars(from.conv, *destbuf, length_in_chars,
337
4
                source, source_length, status);
338
4
  if (U_SUCCESS(*status))
339
4
    ret = ToBufferEndian(env, &destbuf);
340
8
  return ret;
341
}
342
343
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
344
                                     const char* fromEncoding,
345
                                     const char* toEncoding,
346
                                     const char* source,
347
                                     const size_t source_length,
348
                                     UErrorCode* status) {
349
  *status = U_ZERO_ERROR;
350
  MaybeStackBuffer<UChar> sourcebuf;
351
  MaybeLocal<Object> ret;
352
  Converter to(toEncoding, "?");
353
  const size_t length_in_chars = source_length / sizeof(UChar);
354
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
355
  MaybeStackBuffer<char> destbuf(length_in_chars);
356
  const uint32_t len = ucnv_fromUChars(to.conv, *destbuf, length_in_chars,
357
                                       *sourcebuf, length_in_chars, status);
358
  if (U_SUCCESS(*status)) {
359
    destbuf.SetLength(len);
360
    ret = ToBufferEndian(env, &destbuf);
361
  }
362
  return ret;
363
}
364
365
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
366
                                         const char* fromEncoding,
367
                                         const char* toEncoding,
368
                                         const char* source,
369
                                         const size_t source_length,
370
                                         UErrorCode* status) {
371
2
  *status = U_ZERO_ERROR;
372
2
  MaybeStackBuffer<UChar> destbuf;
373
  int32_t result_length;
374
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
375
4
                source, source_length, status);
376
  MaybeLocal<Object> ret;
377
2
  if (U_SUCCESS(*status)) {
378
1
    destbuf.SetLength(result_length);
379
1
    ret = ToBufferEndian(env, &destbuf);
380
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
381
1
    *status = U_ZERO_ERROR;
382
1
    destbuf.AllocateSufficientStorage(result_length);
383
    u_strFromUTF8(*destbuf, result_length, &result_length,
384
1
                  source, source_length, status);
385
1
    if (U_SUCCESS(*status)) {
386
1
      destbuf.SetLength(result_length);
387
1
      ret = ToBufferEndian(env, &destbuf);
388
    }
389
  }
390
2
  return ret;
391
}
392
393
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
394
                                         const char* fromEncoding,
395
                                         const char* toEncoding,
396
                                         const char* source,
397
                                         const size_t source_length,
398
                                         UErrorCode* status) {
399
2
  *status = U_ZERO_ERROR;
400
  MaybeLocal<Object> ret;
401
2
  const size_t length_in_chars = source_length / sizeof(UChar);
402
  int32_t result_length;
403
2
  MaybeStackBuffer<UChar> sourcebuf;
404
4
  MaybeStackBuffer<char> destbuf;
405
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
406
2
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
407
4
              *sourcebuf, length_in_chars, status);
408
2
  if (U_SUCCESS(*status)) {
409
1
    destbuf.SetLength(result_length);
410
1
    ret = ToBufferEndian(env, &destbuf);
411
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
412
1
    *status = U_ZERO_ERROR;
413
1
    destbuf.AllocateSufficientStorage(result_length);
414
1
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
415
2
                length_in_chars, status);
416
1
    if (U_SUCCESS(*status)) {
417
1
      destbuf.SetLength(result_length);
418
1
      ret = ToBufferEndian(env, &destbuf);
419
    }
420
  }
421
4
  return ret;
422
}
423
424
20
const char* EncodingName(const enum encoding encoding) {
425

20
  switch (encoding) {
426
2
    case ASCII: return "us-ascii";
427
4
    case LATIN1: return "iso8859-1";
428
8
    case UCS2: return "utf16le";
429
6
    case UTF8: return "utf-8";
430
    default: return nullptr;
431
  }
432
}
433
434
22
bool SupportedEncoding(const enum encoding encoding) {
435
22
  switch (encoding) {
436
    case ASCII:
437
    case LATIN1:
438
    case UCS2:
439
20
    case UTF8: return true;
440
2
    default: return false;
441
  }
442
}
443
444
12
void Transcode(const FunctionCallbackInfo<Value>&args) {
445
12
  Environment* env = Environment::GetCurrent(args);
446
12
  Isolate* isolate = env->isolate();
447
12
  UErrorCode status = U_ZERO_ERROR;
448
  MaybeLocal<Object> result;
449
450
12
  THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
451

96
  SPREAD_BUFFER_ARG(args[0], ts_obj);
452
12
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
453
12
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
454
455

12
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
456
10
    TranscodeFunc tfn = &Transcode;
457

10
    switch (fromEncoding) {
458
      case ASCII:
459
      case LATIN1:
460
4
        if (toEncoding == UCS2)
461
4
          tfn = &TranscodeToUcs2;
462
4
        break;
463
      case UTF8:
464
4
        if (toEncoding == UCS2)
465
2
          tfn = &TranscodeUcs2FromUtf8;
466
4
        break;
467
      case UCS2:
468
2
        switch (toEncoding) {
469
          case UCS2:
470
            tfn = &Transcode;
471
            break;
472
          case UTF8:
473
2
            tfn = &TranscodeUtf8FromUcs2;
474
2
            break;
475
          default:
476
            tfn = &TranscodeFromUcs2;
477
        }
478
2
        break;
479
      default:
480
        // This should not happen because of the SupportedEncoding checks
481
        ABORT();
482
    }
483
484
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
485
10
                 ts_obj_data, ts_obj_length, &status);
486
  } else {
487
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
488
  }
489
490
12
  if (result.IsEmpty())
491
6
    return args.GetReturnValue().Set(status);
492
493
20
  return args.GetReturnValue().Set(result.ToLocalChecked());
494
}
495
496
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
497
2
  Environment* env = Environment::GetCurrent(args);
498
4
  UErrorCode status = static_cast<UErrorCode>(args[0]->Int32Value());
499
  args.GetReturnValue().Set(
500
      String::NewFromUtf8(env->isolate(),
501
                          u_errorName(status),
502
6
                          v8::NewStringType::kNormal).ToLocalChecked());
503
2
}
504
505
#define TYPE_ICU "icu"
506
#define TYPE_UNICODE "unicode"
507
#define TYPE_CLDR "cldr"
508
#define TYPE_TZ "tz"
509
510
/**
511
 * This is the workhorse function that deals with the actual version info.
512
 * Get an ICU version.
513
 * @param type the type of version to get. One of VERSION_TYPES
514
 * @param buf optional buffer for result
515
 * @param status ICU error status. If failure, assume result is undefined.
516
 * @return version number, or NULL. May or may not be buf.
517
 */
518
13392
const char* GetVersion(const char* type,
519
                       char buf[U_MAX_VERSION_STRING_LENGTH],
520
                       UErrorCode* status) {
521
13392
  if (!strcmp(type, TYPE_ICU)) {
522
3348
    return U_ICU_VERSION;
523
10044
  } else if (!strcmp(type, TYPE_UNICODE)) {
524
3348
    return U_UNICODE_VERSION;
525
6696
  } else if (!strcmp(type, TYPE_TZ)) {
526
3348
    return TimeZone::getTZDataVersion(*status);
527
3348
  } else if (!strcmp(type, TYPE_CLDR)) {
528
    UVersionInfo versionArray;
529
3348
    ulocdata_getCLDRVersion(versionArray, status);
530
3348
    if (U_SUCCESS(*status)) {
531
3348
      u_versionToString(versionArray, buf);
532
3348
      return buf;
533
    }
534
  }
535
  // Fall through - unknown type or error case
536
  return nullptr;
537
}
538
539
16740
void GetVersion(const FunctionCallbackInfo<Value>& args) {
540
16740
  Environment* env = Environment::GetCurrent(args);
541
16740
  if ( args.Length() == 0 ) {
542
    // With no args - return a comma-separated list of allowed values
543
      args.GetReturnValue().Set(
544
          String::NewFromUtf8(env->isolate(),
545
            TYPE_ICU ","
546
            TYPE_UNICODE ","
547
            TYPE_CLDR ","
548
10044
            TYPE_TZ));
549
  } else {
550
13392
    CHECK_GE(args.Length(), 1);
551
40176
    CHECK(args[0]->IsString());
552
13392
    Utf8Value val(env->isolate(), args[0]);
553
13392
    UErrorCode status = U_ZERO_ERROR;
554
13392
    char buf[U_MAX_VERSION_STRING_LENGTH] = "";  // Possible output buffer.
555
13392
    const char* versionString = GetVersion(*val, buf, &status);
556
557

13392
    if (U_SUCCESS(status) && versionString) {
558
      // Success.
559
      args.GetReturnValue().Set(
560
          String::NewFromUtf8(env->isolate(),
561
40176
          versionString));
562
13392
    }
563
  }
564
16740
}
565
566
}  // anonymous namespace
567
568
3357
bool InitializeICUDirectory(const std::string& path) {
569
3357
  UErrorCode status = U_ZERO_ERROR;
570
3357
  if (path.empty()) {
571
#ifdef NODE_HAVE_SMALL_ICU
572
    // install the 'small' data.
573
3355
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
574
#else  // !NODE_HAVE_SMALL_ICU
575
    // no small data, so nothing to do.
576
#endif  // !NODE_HAVE_SMALL_ICU
577
  } else {
578
2
    u_setDataDirectory(path.c_str());
579
2
    u_init(&status);
580
  }
581
3357
  return status == U_ZERO_ERROR;
582
}
583
584
381
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
585
                  const char* input,
586
                  size_t length) {
587
381
  UErrorCode status = U_ZERO_ERROR;
588
381
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
589
381
  UIDNA* uidna = uidna_openUTS46(options, &status);
590
381
  if (U_FAILURE(status))
591
    return -1;
592
381
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
593
594
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
595
                                        input, length,
596
381
                                        **buf, buf->capacity(),
597
                                        &info,
598
381
                                        &status);
599
600
  // Do not check info.errors like we do with ToASCII since ToUnicode always
601
  // returns a string, despite any possible errors that may have occurred.
602
603
381
  if (status == U_BUFFER_OVERFLOW_ERROR) {
604
    status = U_ZERO_ERROR;
605
    buf->AllocateSufficientStorage(len);
606
    len = uidna_nameToUnicodeUTF8(uidna,
607
                                  input, length,
608
                                  **buf, buf->capacity(),
609
                                  &info,
610
                                  &status);
611
  }
612
613
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
614
  // string, regardless of whether an error occurred.
615
616
381
  if (U_FAILURE(status)) {
617
    len = -1;
618
    buf->SetLength(0);
619
  } else {
620
381
    buf->SetLength(len);
621
  }
622
623
381
  uidna_close(uidna);
624
381
  return len;
625
}
626
627
10049
int32_t ToASCII(MaybeStackBuffer<char>* buf,
628
                const char* input,
629
                size_t length,
630
                enum idna_mode mode) {
631
10049
  UErrorCode status = U_ZERO_ERROR;
632
  uint32_t options =                  // CheckHyphens = false; handled later
633
    UIDNA_CHECK_BIDI |                // CheckBidi = true
634
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
635
10049
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
636
10049
  if (mode == IDNA_STRICT) {
637
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
638
                                      // VerifyDnsLength = beStrict;
639
                                      //   handled later
640
  }
641
642
10049
  UIDNA* uidna = uidna_openUTS46(options, &status);
643
10049
  if (U_FAILURE(status))
644
    return -1;
645
10049
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
646
647
  int32_t len = uidna_nameToASCII_UTF8(uidna,
648
                                       input, length,
649
10049
                                       **buf, buf->capacity(),
650
                                       &info,
651
10049
                                       &status);
652
653
10049
  if (status == U_BUFFER_OVERFLOW_ERROR) {
654
    status = U_ZERO_ERROR;
655
    buf->AllocateSufficientStorage(len);
656
    len = uidna_nameToASCII_UTF8(uidna,
657
                                 input, length,
658
                                 **buf, buf->capacity(),
659
                                 &info,
660
                                 &status);
661
  }
662
663
  // In UTS #46 which specifies ToASCII, certain error conditions are
664
  // configurable through options, and the WHATWG URL Standard promptly elects
665
  // to disable some of them to accommodate for real-world use cases.
666
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
667
  // these options through `options` above, and thus continues throwing
668
  // unnecessary errors. To counter this situation, we just filter out the
669
  // errors that may have happened afterwards, before deciding whether to
670
  // return an error from this function.
671
672
  // CheckHyphens = false
673
  // (Specified in the current UTS #46 draft rev. 18.)
674
  // Refs:
675
  // - https://github.com/whatwg/url/issues/53
676
  // - https://github.com/whatwg/url/pull/309
677
  // - http://www.unicode.org/review/pri317/
678
  // - http://www.unicode.org/reports/tr46/tr46-18.html
679
  // - https://www.icann.org/news/announcement-2000-01-07-en
680
10049
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
681
10049
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
682
10049
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
683
684
10049
  if (mode != IDNA_STRICT) {
685
    // VerifyDnsLength = beStrict
686
10049
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
687
10049
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
688
10049
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
689
  }
690
691


10049
  if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
692
70
    len = -1;
693
70
    buf->SetLength(0);
694
  } else {
695
9979
    buf->SetLength(len);
696
  }
697
698
10049
  uidna_close(uidna);
699
10049
  return len;
700
}
701
702
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
703
189
  Environment* env = Environment::GetCurrent(args);
704
189
  CHECK_GE(args.Length(), 1);
705
567
  CHECK(args[0]->IsString());
706
189
  Utf8Value val(env->isolate(), args[0]);
707
708
378
  MaybeStackBuffer<char> buf;
709
189
  int32_t len = ToUnicode(&buf, *val, val.length());
710
711
189
  if (len < 0) {
712
189
    return env->ThrowError("Cannot convert name to Unicode");
713
  }
714
715
  args.GetReturnValue().Set(
716
      String::NewFromUtf8(env->isolate(),
717
189
                          *buf,
718
                          v8::NewStringType::kNormal,
719
756
                          len).ToLocalChecked());
720
}
721
722
8234
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
723
8234
  Environment* env = Environment::GetCurrent(args);
724
8234
  CHECK_GE(args.Length(), 1);
725
24702
  CHECK(args[0]->IsString());
726
8234
  Utf8Value val(env->isolate(), args[0]);
727
  // optional arg
728
32936
  bool lenient = args[1]->BooleanValue(env->context()).FromJust();
729
8234
  enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
730
731
16459
  MaybeStackBuffer<char> buf;
732
8234
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
733
734
8234
  if (len < 0) {
735
8243
    return env->ThrowError("Cannot convert name to ASCII");
736
  }
737
738
  args.GetReturnValue().Set(
739
      String::NewFromUtf8(env->isolate(),
740
8225
                          *buf,
741
                          v8::NewStringType::kNormal,
742
32900
                          len).ToLocalChecked());
743
}
744
745
// This is similar to wcwidth except that it takes the current unicode
746
// character properties database into consideration, allowing it to
747
// correctly calculate the column widths of things like emoji's and
748
// newer wide characters. wcwidth, on the other hand, uses a fixed
749
// algorithm that does not take things like emoji into proper
750
// consideration.
751
//
752
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
753
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
754
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
755
// allow it to be input. Linux's PTY terminal prints control characters as
756
// Narrow rhombi.
757
//
758
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
759
// consonants are 0-width when combined with initial consonants; otherwise they
760
// are technically Wide. But many terminals (including Konsole and
761
// VTE/GLib-based) implement all medials and finals as 0-width.
762
//
763
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
764
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
765
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
766
116453
static int GetColumnWidth(UChar32 codepoint,
767
                          bool ambiguous_as_full_width = false) {
768
  const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
769
                               U_GC_CF_MASK |  // Format control character
770
                               U_GC_ME_MASK |  // Enclosing mark
771
116453
                               U_GC_MN_MASK;   // Nonspacing mark
772

232961
  if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
773
232849
      ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
774
116397
       u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
775
56
    return 0;
776
  }
777
778
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
779
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
780
  // or halfwidth.
781
116397
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
782

116397
  switch (eaw) {
783
    case U_EA_FULLWIDTH:
784
    case U_EA_WIDE:
785
50
      return 2;
786
    case U_EA_AMBIGUOUS:
787
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
788
4
      if (ambiguous_as_full_width) {
789
1
        return 2;
790
      }
791
      // Fall through if ambiguous_as_full_width if false.
792
    case U_EA_NEUTRAL:
793
26
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
794
        return 2;
795
      }
796
      // Fall through
797
    case U_EA_HALFWIDTH:
798
    case U_EA_NARROW:
799
    default:
800
116346
      return 1;
801
  }
802
}
803
804
// Returns the column width for the given String.
805
116114
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
806
116114
  Environment* env = Environment::GetCurrent(args);
807
116114
  if (args.Length() < 1)
808
116029
    return;
809
810
232228
  bool ambiguous_as_full_width = args[1]->BooleanValue();
811
232228
  bool expand_emoji_sequence = args[2]->BooleanValue();
812
813
232228
  if (args[0]->IsNumber()) {
814
    args.GetReturnValue().Set(
815
348087
        GetColumnWidth(args[0]->Uint32Value(),
816
464116
                       ambiguous_as_full_width));
817
116029
    return;
818
  }
819
820
85
  TwoByteValue value(env->isolate(), args[0]);
821
  // reinterpret_cast is required by windows to compile
822
85
  UChar* str = reinterpret_cast<UChar*>(*value);
823
  static_assert(sizeof(*str) == sizeof(**value),
824
                "sizeof(*str) == sizeof(**value)");
825
85
  UChar32 c = 0;
826
  UChar32 p;
827
85
  size_t n = 0;
828
85
  uint32_t width = 0;
829
830
597
  while (n < value.length()) {
831
427
    p = c;
832


427
    U16_NEXT(str, n, value.length(), c);
833
    // Don't count individual emoji codepoints that occur within an
834
    // emoji sequence. This is not necessarily foolproof. Some
835
    // environments display emoji sequences in the appropriate
836
    // condensed form (as a single emoji glyph), other environments
837
    // may not understand an emoji sequence and will display each
838
    // individual emoji separately. When this happens, the width
839
    // calculated will be off, and there's no reliable way of knowing
840
    // in advance if a particular sequence is going to be supported.
841
    // The expand_emoji_sequence option allows the caller to skip this
842
    // check and count each code within an emoji sequence separately.
843

1274
    if (!expand_emoji_sequence &&
844

853
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
845
3
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
846
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
847
3
      continue;
848
    }
849
424
    width += GetColumnWidth(c, ambiguous_as_full_width);
850
  }
851
170
  args.GetReturnValue().Set(width);
852
}
853
854
3348
void Init(Local<Object> target,
855
          Local<Value> unused,
856
          Local<Context> context,
857
          void* priv) {
858
3348
  Environment* env = Environment::GetCurrent(context);
859
3348
  env->SetMethod(target, "toUnicode", ToUnicode);
860
3348
  env->SetMethod(target, "toASCII", ToASCII);
861
3348
  env->SetMethod(target, "getStringWidth", GetStringWidth);
862
3348
  env->SetMethod(target, "getVersion", GetVersion);
863
864
  // One-shot converters
865
3348
  env->SetMethod(target, "icuErrName", ICUErrorName);
866
3348
  env->SetMethod(target, "transcode", Transcode);
867
868
  // ConverterObject
869
3348
  env->SetMethod(target, "getConverter", ConverterObject::Create);
870
3348
  env->SetMethod(target, "decode", ConverterObject::Decode);
871
3348
  env->SetMethod(target, "hasConverter", ConverterObject::Has);
872
3348
}
873
874
}  // namespace i18n
875
}  // namespace node
876
877
3391
NODE_BUILTIN_MODULE_CONTEXT_AWARE(icu, node::i18n::Init)
878
879
#endif  // NODE_HAVE_I18N_SUPPORT