GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_i18n.cc Lines: 377 428 88.1 %
Date: 2022-12-07 04:23:16 Branches: 146 222 65.8 %

Line Branch Exec Source
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
/*
23
 * notes: by srl295
24
 *  - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data
25
 *     ( stubdata/libicudata.a ) containing nothing, no data, and it's also
26
 *    linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT
27
 *    macro names. That's the "english+root" data.
28
 *
29
 *    If icu_data_path is non-null, the user has provided a path and we assume
30
 *    it goes somewhere useful. We set that path in ICU, and exit.
31
 *    If icu_data_path is null, they haven't set a path and we want the
32
 *    "english+root" data.  We call
33
 *       udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...)
34
 *    to load up the english+root data.
35
 *
36
 *  - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full
37
 *    data. All of the variables and command line options for changing data at
38
 *    runtime are disabled, as they wouldn't fully override the internal data.
39
 *    See:  http://bugs.icu-project.org/trac/ticket/10924
40
 */
41
42
43
#include "node_i18n.h"
44
#include "node_external_reference.h"
45
46
#if defined(NODE_HAVE_I18N_SUPPORT)
47
48
#include "base_object-inl.h"
49
#include "node.h"
50
#include "node_buffer.h"
51
#include "node_errors.h"
52
#include "node_internals.h"
53
#include "string_bytes.h"
54
#include "util-inl.h"
55
#include "v8.h"
56
57
#include <unicode/utypes.h>
58
#include <unicode/putil.h>
59
#include <unicode/uchar.h>
60
#include <unicode/uclean.h>
61
#include <unicode/udata.h>
62
#include <unicode/uidna.h>
63
#include <unicode/ucnv.h>
64
#include <unicode/utf8.h>
65
#include <unicode/utf16.h>
66
#include <unicode/timezone.h>
67
#include <unicode/ulocdata.h>
68
#include <unicode/uvernum.h>
69
#include <unicode/uversion.h>
70
#include <unicode/ustring.h>
71
72
#ifdef NODE_HAVE_SMALL_ICU
73
/* if this is defined, we have a 'secondary' entry point.
74
   compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */
75
#define SMALL_ICUDATA_ENTRY_POINT \
76
  SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME)
77
#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff)
78
#ifndef U_LIB_SUFFIX_C_NAME
79
#define SMALL_DEF(major, suff) icusmdt##major##_dat
80
#else
81
#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat
82
#endif
83
84
extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
85
#endif
86
87
namespace node {
88
89
using v8::Context;
90
using v8::FunctionCallbackInfo;
91
using v8::FunctionTemplate;
92
using v8::Int32;
93
using v8::Isolate;
94
using v8::Local;
95
using v8::MaybeLocal;
96
using v8::NewStringType;
97
using v8::Object;
98
using v8::ObjectTemplate;
99
using v8::String;
100
using v8::Value;
101
102
namespace i18n {
103
namespace {
104
105
template <typename T>
106
22
MaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) {
107
22
  MaybeLocal<Object> ret = Buffer::New(env, buf);
108
22
  if (ret.IsEmpty())
109
    return ret;
110
111
  static_assert(sizeof(T) == 1 || sizeof(T) == 2,
112
                "Currently only one- or two-byte buffers are supported");
113
12
  if (sizeof(T) > 1 && IsBigEndian()) {
114
    SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf);
115
    SwapBytes16(retbuf_data, retbuf_length);
116
  }
117
118
22
  return ret;
119
}
120
121
// One-Shot Converters
122
123
2
void CopySourceBuffer(MaybeStackBuffer<UChar>* dest,
124
                      const char* data,
125
                      const size_t length,
126
                      const size_t length_in_chars) {
127
2
  dest->AllocateSufficientStorage(length_in_chars);
128
2
  char* dst = reinterpret_cast<char*>(**dest);
129
2
  memcpy(dst, data, length);
130
2
  if (IsBigEndian()) {
131
    SwapBytes16(dst, length);
132
  }
133
2
}
134
135
typedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env,
136
                                            const char* fromEncoding,
137
                                            const char* toEncoding,
138
                                            const char* source,
139
                                            const size_t source_length,
140
                                            UErrorCode* status);
141
142
3
MaybeLocal<Object> Transcode(Environment* env,
143
                             const char* fromEncoding,
144
                             const char* toEncoding,
145
                             const char* source,
146
                             const size_t source_length,
147
                             UErrorCode* status) {
148
3
  *status = U_ZERO_ERROR;
149
  MaybeLocal<Object> ret;
150
6
  MaybeStackBuffer<char> result;
151
6
  Converter to(toEncoding);
152
6
  Converter from(fromEncoding);
153
154
3
  size_t sublen = ucnv_getMinCharSize(to.conv());
155
3
  std::string sub(sublen, '?');
156
3
  to.set_subst_chars(sub.c_str());
157
158
3
  const uint32_t limit = source_length * to.max_char_size();
159
3
  result.AllocateSufficientStorage(limit);
160
3
  char* target = *result;
161
3
  ucnv_convertEx(to.conv(), from.conv(), &target, target + limit,
162
                 &source, source + source_length, nullptr, nullptr,
163
                 nullptr, nullptr, true, true, status);
164
3
  if (U_SUCCESS(*status)) {
165
3
    result.SetLength(target - &result[0]);
166
3
    ret = ToBufferEndian(env, &result);
167
  }
168
3
  return ret;
169
}
170
171
4
MaybeLocal<Object> TranscodeToUcs2(Environment* env,
172
                                   const char* fromEncoding,
173
                                   const char* toEncoding,
174
                                   const char* source,
175
                                   const size_t source_length,
176
                                   UErrorCode* status) {
177
4
  *status = U_ZERO_ERROR;
178
  MaybeLocal<Object> ret;
179
8
  MaybeStackBuffer<UChar> destbuf(source_length);
180
4
  Converter from(fromEncoding);
181
4
  const size_t length_in_chars = source_length * sizeof(UChar);
182
4
  ucnv_toUChars(from.conv(), *destbuf, length_in_chars,
183
                source, source_length, status);
184
4
  if (U_SUCCESS(*status))
185
4
    ret = ToBufferEndian(env, &destbuf);
186
4
  return ret;
187
}
188
189
MaybeLocal<Object> TranscodeFromUcs2(Environment* env,
190
                                     const char* fromEncoding,
191
                                     const char* toEncoding,
192
                                     const char* source,
193
                                     const size_t source_length,
194
                                     UErrorCode* status) {
195
  *status = U_ZERO_ERROR;
196
  MaybeStackBuffer<UChar> sourcebuf;
197
  MaybeLocal<Object> ret;
198
  Converter to(toEncoding);
199
200
  size_t sublen = ucnv_getMinCharSize(to.conv());
201
  std::string sub(sublen, '?');
202
  to.set_subst_chars(sub.c_str());
203
204
  const size_t length_in_chars = source_length / sizeof(UChar);
205
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
206
  MaybeStackBuffer<char> destbuf(length_in_chars);
207
  const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars,
208
                                       *sourcebuf, length_in_chars, status);
209
  if (U_SUCCESS(*status)) {
210
    destbuf.SetLength(len);
211
    ret = ToBufferEndian(env, &destbuf);
212
  }
213
  return ret;
214
}
215
216
2
MaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env,
217
                                         const char* fromEncoding,
218
                                         const char* toEncoding,
219
                                         const char* source,
220
                                         const size_t source_length,
221
                                         UErrorCode* status) {
222
2
  *status = U_ZERO_ERROR;
223
2
  MaybeStackBuffer<UChar> destbuf;
224
  int32_t result_length;
225
2
  u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length,
226
                source, source_length, status);
227
  MaybeLocal<Object> ret;
228
2
  if (U_SUCCESS(*status)) {
229
1
    destbuf.SetLength(result_length);
230
1
    ret = ToBufferEndian(env, &destbuf);
231
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
232
1
    *status = U_ZERO_ERROR;
233
1
    destbuf.AllocateSufficientStorage(result_length);
234
1
    u_strFromUTF8(*destbuf, result_length, &result_length,
235
                  source, source_length, status);
236
1
    if (U_SUCCESS(*status)) {
237
1
      destbuf.SetLength(result_length);
238
1
      ret = ToBufferEndian(env, &destbuf);
239
    }
240
  }
241
2
  return ret;
242
}
243
244
2
MaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env,
245
                                         const char* fromEncoding,
246
                                         const char* toEncoding,
247
                                         const char* source,
248
                                         const size_t source_length,
249
                                         UErrorCode* status) {
250
2
  *status = U_ZERO_ERROR;
251
  MaybeLocal<Object> ret;
252
2
  const size_t length_in_chars = source_length / sizeof(UChar);
253
  int32_t result_length;
254
4
  MaybeStackBuffer<UChar> sourcebuf;
255
2
  MaybeStackBuffer<char> destbuf;
256
2
  CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars);
257
2
  u_strToUTF8(*destbuf, destbuf.capacity(), &result_length,
258
2
              *sourcebuf, length_in_chars, status);
259
2
  if (U_SUCCESS(*status)) {
260
1
    destbuf.SetLength(result_length);
261
1
    ret = ToBufferEndian(env, &destbuf);
262
1
  } else if (*status == U_BUFFER_OVERFLOW_ERROR) {
263
1
    *status = U_ZERO_ERROR;
264
1
    destbuf.AllocateSufficientStorage(result_length);
265
1
    u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf,
266
                length_in_chars, status);
267
1
    if (U_SUCCESS(*status)) {
268
1
      destbuf.SetLength(result_length);
269
1
      ret = ToBufferEndian(env, &destbuf);
270
    }
271
  }
272
2
  return ret;
273
}
274
275
22
const char* EncodingName(const enum encoding encoding) {
276

22
  switch (encoding) {
277
2
    case ASCII: return "us-ascii";
278
4
    case LATIN1: return "iso8859-1";
279
10
    case UCS2: return "utf16le";
280
6
    case UTF8: return "utf-8";
281
    default: return nullptr;
282
  }
283
}
284
285
24
bool SupportedEncoding(const enum encoding encoding) {
286
24
  switch (encoding) {
287
22
    case ASCII:
288
    case LATIN1:
289
    case UCS2:
290
22
    case UTF8: return true;
291
2
    default: return false;
292
  }
293
}
294
295
13
void Transcode(const FunctionCallbackInfo<Value>&args) {
296
13
  Environment* env = Environment::GetCurrent(args);
297
13
  Isolate* isolate = env->isolate();
298
13
  UErrorCode status = U_ZERO_ERROR;
299
  MaybeLocal<Object> result;
300
301
13
  ArrayBufferViewContents<char> input(args[0]);
302
13
  const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER);
303
13
  const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER);
304
305

13
  if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) {
306
11
    TranscodeFunc tfn = &Transcode;
307

11
    switch (fromEncoding) {
308
4
      case ASCII:
309
      case LATIN1:
310
4
        if (toEncoding == UCS2)
311
4
          tfn = &TranscodeToUcs2;
312
4
        break;
313
4
      case UTF8:
314
4
        if (toEncoding == UCS2)
315
2
          tfn = &TranscodeUcs2FromUtf8;
316
4
        break;
317
3
      case UCS2:
318
3
        switch (toEncoding) {
319
1
          case UCS2:
320
1
            tfn = &Transcode;
321
1
            break;
322
2
          case UTF8:
323
2
            tfn = &TranscodeUtf8FromUcs2;
324
2
            break;
325
          default:
326
            tfn = &TranscodeFromUcs2;
327
        }
328
3
        break;
329
      default:
330
        // This should not happen because of the SupportedEncoding checks
331
        ABORT();
332
    }
333
334
    result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding),
335
11
                 input.data(), input.length(), &status);
336
  } else {
337
2
    status = U_ILLEGAL_ARGUMENT_ERROR;
338
  }
339
340
13
  if (result.IsEmpty())
341
4
    return args.GetReturnValue().Set(status);
342
343
22
  return args.GetReturnValue().Set(result.ToLocalChecked());
344
}
345
346
2
void ICUErrorName(const FunctionCallbackInfo<Value>& args) {
347
2
  Environment* env = Environment::GetCurrent(args);
348
2
  CHECK(args[0]->IsInt32());
349
4
  UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value());
350
6
  args.GetReturnValue().Set(
351
2
      String::NewFromUtf8(env->isolate(),
352
2
                          u_errorName(status)).ToLocalChecked());
353
2
}
354
355
}  // anonymous namespace
356
357
10
Converter::Converter(const char* name, const char* sub) {
358
10
  UErrorCode status = U_ZERO_ERROR;
359
10
  UConverter* conv = ucnv_open(name, &status);
360
10
  CHECK(U_SUCCESS(status));
361
10
  conv_.reset(conv);
362
10
  set_subst_chars(sub);
363
10
}
364
365
589
Converter::Converter(UConverter* converter, const char* sub)
366
589
    : conv_(converter) {
367
589
  set_subst_chars(sub);
368
589
}
369
370
1191
void Converter::set_subst_chars(const char* sub) {
371
1191
  CHECK(conv_);
372
1191
  UErrorCode status = U_ZERO_ERROR;
373
1191
  if (sub != nullptr) {
374
592
    ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status);
375
592
    CHECK(U_SUCCESS(status));
376
  }
377
1191
}
378
379
606
void Converter::reset() {
380
606
  ucnv_reset(conv_.get());
381
606
}
382
383
1504
size_t Converter::min_char_size() const {
384
1504
  CHECK(conv_);
385
1504
  return ucnv_getMinCharSize(conv_.get());
386
}
387
388
3
size_t Converter::max_char_size() const {
389
3
  CHECK(conv_);
390
3
  return ucnv_getMaxCharSize(conv_.get());
391
}
392
393
2
void ConverterObject::Has(const FunctionCallbackInfo<Value>& args) {
394
2
  Environment* env = Environment::GetCurrent(args);
395
396
2
  CHECK_GE(args.Length(), 1);
397
4
  Utf8Value label(env->isolate(), args[0]);
398
399
2
  UErrorCode status = U_ZERO_ERROR;
400
2
  ConverterPointer conv(ucnv_open(*label, &status));
401
4
  args.GetReturnValue().Set(!!U_SUCCESS(status));
402
2
}
403
404
589
void ConverterObject::Create(const FunctionCallbackInfo<Value>& args) {
405
589
  Environment* env = Environment::GetCurrent(args);
406
407
589
  Local<ObjectTemplate> t = env->i18n_converter_template();
408
  Local<Object> obj;
409
1178
  if (!t->NewInstance(env->context()).ToLocal(&obj)) return;
410
411
589
  CHECK_GE(args.Length(), 2);
412
589
  Utf8Value label(env->isolate(), args[0]);
413
589
  int flags = args[1]->Uint32Value(env->context()).ToChecked();
414
589
  bool fatal =
415
589
      (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL;
416
417
589
  UErrorCode status = U_ZERO_ERROR;
418
589
  UConverter* conv = ucnv_open(*label, &status);
419
589
  if (U_FAILURE(status))
420
    return;
421
422
589
  if (fatal) {
423
357
    status = U_ZERO_ERROR;
424
357
    ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP,
425
                        nullptr, nullptr, nullptr, &status);
426
  }
427
428
589
  auto converter = new ConverterObject(env, obj, conv, flags);
429
589
  size_t sublen = ucnv_getMinCharSize(conv);
430
589
  std::string sub(sublen, '?');
431
589
  converter->set_subst_chars(sub.c_str());
432
433
1178
  args.GetReturnValue().Set(obj);
434
}
435
436
1510
void ConverterObject::Decode(const FunctionCallbackInfo<Value>& args) {
437
1510
  Environment* env = Environment::GetCurrent(args);
438
439
1510
  CHECK_GE(args.Length(), 4);  // Converter, Buffer, Flags, Encoding
440
441
  ConverterObject* converter;
442
4433
  ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>());
443
444

4006
  if (!(args[1]->IsArrayBuffer() || args[1]->IsSharedArrayBuffer() ||
445

2757
        args[1]->IsArrayBufferView())) {
446
6
    return node::THROW_ERR_INVALID_ARG_TYPE(
447
        env->isolate(),
448
        "The \"input\" argument must be an instance of SharedArrayBuffer, "
449
6
        "ArrayBuffer or ArrayBufferView.");
450
  }
451
452
1504
  ArrayBufferViewContents<char> input(args[1]);
453
3008
  int flags = args[2]->Uint32Value(env->context()).ToChecked();
454
455
3008
  CHECK(args[3]->IsString());
456
1504
  Local<String> from_encoding = args[3].As<String>();
457
458
1504
  UErrorCode status = U_ZERO_ERROR;
459
1504
  MaybeStackBuffer<UChar> result;
460
461
1504
  UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH;
462
463
  // When flushing the final chunk, the limit is the maximum
464
  // of either the input buffer length or the number of pending
465
  // characters times the min char size, multiplied by 2 as unicode may
466
  // take up to 2 UChars to encode a character
467
1504
  size_t limit = 2 * converter->min_char_size() *
468
2110
      (!flush ?
469
898
          input.length() :
470
          std::max(
471
2716
              input.length(),
472
606
              static_cast<size_t>(
473
1212
                  ucnv_toUCountPending(converter->conv(), &status))));
474
1504
  status = U_ZERO_ERROR;
475
476
1504
  if (limit > 0)
477
1310
    result.AllocateSufficientStorage(limit);
478
479
1504
  auto cleanup = OnScopeLeave([&]() {
480
1504
    if (flush) {
481
      // Reset the converter state.
482
606
      converter->set_bom_seen(false);
483
606
      converter->reset();
484
    }
485
1504
  });
486
487
1504
  const char* source = input.data();
488
1504
  size_t source_length = input.length();
489
490
1504
  UChar* target = *result;
491
1504
  ucnv_toUnicode(converter->conv(),
492
                 &target,
493
1504
                 target + limit,
494
                 &source,
495
                 source + source_length,
496
                 nullptr,
497
                 flush,
498
                 &status);
499
500
1504
  if (U_SUCCESS(status)) {
501
1407
    bool omit_initial_bom = false;
502
1407
    if (limit > 0) {
503
1213
      result.SetLength(target - &result[0]);
504
2222
      if (result.length() > 0 &&
505
1009
          converter->unicode() &&
506

3131
          !converter->ignore_bom() &&
507
909
          !converter->bom_seen()) {
508
        // If the very first result in the stream is a BOM, and we are not
509
        // explicitly told to ignore it, then we mark it for discarding.
510
422
        if (result[0] == 0xFEFF)
511
32
          omit_initial_bom = true;
512
422
        converter->set_bom_seen(true);
513
      }
514
    }
515
516
    Local<Value> error;
517
1407
    UChar* output = result.out();
518
1407
    size_t beginning = 0;
519
1407
    size_t length = result.length() * sizeof(UChar);
520
521
1407
    if (omit_initial_bom) {
522
      // Perform `ret = ret.slice(2)`.
523
32
      beginning += 2;
524
32
      length -= 2;
525
    }
526
527
1407
    char* value = reinterpret_cast<char*>(output) + beginning;
528
529
1407
    if (IsBigEndian()) {
530
      SwapBytes16(value, length);
531
    }
532
533
    MaybeLocal<Value> encoded =
534
1407
        StringBytes::Encode(env->isolate(), value, length, UCS2, &error);
535
536
    Local<Value> ret;
537
1407
    if (encoded.ToLocal(&ret)) {
538
1407
      args.GetReturnValue().Set(ret);
539
1407
      return;
540
    }
541
  }
542
543
97
  node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
544
      env->isolate(),
545
      "The encoded data was not valid for encoding %s",
546
194
      *node::Utf8Value(env->isolate(), from_encoding));
547
}
548
549
589
ConverterObject::ConverterObject(
550
    Environment* env,
551
    Local<Object> wrap,
552
    UConverter* converter,
553
    int flags,
554
589
    const char* sub)
555
    : BaseObject(env, wrap),
556
      Converter(converter, sub),
557
589
      flags_(flags) {
558
589
  MakeWeak();
559
560
589
  switch (ucnv_getType(converter)) {
561
579
    case UCNV_UTF8:
562
    case UCNV_UTF16_BigEndian:
563
    case UCNV_UTF16_LittleEndian:
564
579
      flags_ |= CONVERTER_FLAGS_UNICODE;
565
579
      break;
566
589
    default: {
567
      // Fall through
568
    }
569
  }
570
589
}
571
572
573
5674
bool InitializeICUDirectory(const std::string& path) {
574
5674
  UErrorCode status = U_ZERO_ERROR;
575
5674
  if (path.empty()) {
576
#ifdef NODE_HAVE_SMALL_ICU
577
    // install the 'small' data.
578
    udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status);
579
#else  // !NODE_HAVE_SMALL_ICU
580
    // no small data, so nothing to do.
581
#endif  // !NODE_HAVE_SMALL_ICU
582
  } else {
583
    u_setDataDirectory(path.c_str());
584
    u_init(&status);
585
  }
586
5674
  return status == U_ZERO_ERROR;
587
}
588
589
void SetDefaultTimeZone(const char* tzid) {
590
  size_t tzidlen = strlen(tzid) + 1;
591
  UErrorCode status = U_ZERO_ERROR;
592
  MaybeStackBuffer<UChar, 256> id(tzidlen);
593
  u_charsToUChars(tzid, id.out(), tzidlen);
594
  // This is threadsafe:
595
  ucal_setDefaultTimeZone(id.out(), &status);
596
  CHECK(U_SUCCESS(status));
597
}
598
599
384
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
600
                  const char* input,
601
                  size_t length) {
602
384
  UErrorCode status = U_ZERO_ERROR;
603
384
  uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE;
604
384
  UIDNA* uidna = uidna_openUTS46(options, &status);
605
384
  if (U_FAILURE(status))
606
    return -1;
607
384
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
608
609
384
  int32_t len = uidna_nameToUnicodeUTF8(uidna,
610
                                        input, length,
611
384
                                        **buf, buf->capacity(),
612
                                        &info,
613
                                        &status);
614
615
  // Do not check info.errors like we do with ToASCII since ToUnicode always
616
  // returns a string, despite any possible errors that may have occurred.
617
618
384
  if (status == U_BUFFER_OVERFLOW_ERROR) {
619
    status = U_ZERO_ERROR;
620
    buf->AllocateSufficientStorage(len);
621
    len = uidna_nameToUnicodeUTF8(uidna,
622
                                  input, length,
623
                                  **buf, buf->capacity(),
624
                                  &info,
625
                                  &status);
626
  }
627
628
  // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode
629
  // string, regardless of whether an error occurred.
630
631
384
  if (U_FAILURE(status)) {
632
    len = -1;
633
    buf->SetLength(0);
634
  } else {
635
384
    buf->SetLength(len);
636
  }
637
638
384
  uidna_close(uidna);
639
384
  return len;
640
}
641
642
14732
int32_t ToASCII(MaybeStackBuffer<char>* buf,
643
                const char* input,
644
                size_t length,
645
                idna_mode mode) {
646
14732
  UErrorCode status = U_ZERO_ERROR;
647
14732
  uint32_t options =                  // CheckHyphens = false; handled later
648
    UIDNA_CHECK_BIDI |                // CheckBidi = true
649
    UIDNA_CHECK_CONTEXTJ |            // CheckJoiners = true
650
    UIDNA_NONTRANSITIONAL_TO_ASCII;   // Nontransitional_Processing
651
14732
  if (mode == idna_mode::kStrict) {
652
    options |= UIDNA_USE_STD3_RULES;  // UseSTD3ASCIIRules = beStrict
653
                                      // VerifyDnsLength = beStrict;
654
                                      //   handled later
655
  }
656
657
14732
  UIDNA* uidna = uidna_openUTS46(options, &status);
658
14732
  if (U_FAILURE(status))
659
    return -1;
660
14732
  UIDNAInfo info = UIDNA_INFO_INITIALIZER;
661
662
14732
  int32_t len = uidna_nameToASCII_UTF8(uidna,
663
                                       input, length,
664
14732
                                       **buf, buf->capacity(),
665
                                       &info,
666
                                       &status);
667
668
14732
  if (status == U_BUFFER_OVERFLOW_ERROR) {
669
2
    status = U_ZERO_ERROR;
670
2
    buf->AllocateSufficientStorage(len);
671
2
    len = uidna_nameToASCII_UTF8(uidna,
672
                                 input, length,
673
2
                                 **buf, buf->capacity(),
674
                                 &info,
675
                                 &status);
676
  }
677
678
  // In UTS #46 which specifies ToASCII, certain error conditions are
679
  // configurable through options, and the WHATWG URL Standard promptly elects
680
  // to disable some of them to accommodate for real-world use cases.
681
  // Unfortunately, ICU4C's IDNA module does not support disabling some of
682
  // these options through `options` above, and thus continues throwing
683
  // unnecessary errors. To counter this situation, we just filter out the
684
  // errors that may have happened afterwards, before deciding whether to
685
  // return an error from this function.
686
687
  // CheckHyphens = false
688
  // (Specified in the current UTS #46 draft rev. 18.)
689
  // Refs:
690
  // - https://github.com/whatwg/url/issues/53
691
  // - https://github.com/whatwg/url/pull/309
692
  // - http://www.unicode.org/review/pri317/
693
  // - http://www.unicode.org/reports/tr46/tr46-18.html
694
  // - https://www.icann.org/news/announcement-2000-01-07-en
695
14732
  info.errors &= ~UIDNA_ERROR_HYPHEN_3_4;
696
14732
  info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
697
14732
  info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
698
699
14732
  if (mode != idna_mode::kStrict) {
700
    // VerifyDnsLength = beStrict
701
14732
    info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
702
14732
    info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
703
14732
    info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
704
  }
705
706


14732
  if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) {
707
142
    len = -1;
708
142
    buf->SetLength(0);
709
  } else {
710
14590
    buf->SetLength(len);
711
  }
712
713
14732
  uidna_close(uidna);
714
14732
  return len;
715
}
716
717
189
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
718
189
  Environment* env = Environment::GetCurrent(args);
719
189
  CHECK_GE(args.Length(), 1);
720
378
  CHECK(args[0]->IsString());
721
189
  Utf8Value val(env->isolate(), args[0]);
722
723
189
  MaybeStackBuffer<char> buf;
724
189
  int32_t len = ToUnicode(&buf, *val, val.length());
725
726
189
  if (len < 0) {
727
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode");
728
  }
729
730
567
  args.GetReturnValue().Set(
731
189
      String::NewFromUtf8(env->isolate(),
732
189
                          *buf,
733
                          NewStringType::kNormal,
734
189
                          len).ToLocalChecked());
735
}
736
737
10451
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
738
10451
  Environment* env = Environment::GetCurrent(args);
739
10451
  CHECK_GE(args.Length(), 1);
740
20902
  CHECK(args[0]->IsString());
741
10451
  Utf8Value val(env->isolate(), args[0]);
742
  // optional arg
743
10451
  bool lenient = args[1]->BooleanValue(env->isolate());
744
10451
  idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault;
745
746
10451
  MaybeStackBuffer<char> buf;
747
10451
  int32_t len = ToASCII(&buf, *val, val.length(), mode);
748
749
10451
  if (len < 0) {
750
9
    return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII");
751
  }
752
753
31326
  args.GetReturnValue().Set(
754
10442
      String::NewFromUtf8(env->isolate(),
755
10442
                          *buf,
756
                          NewStringType::kNormal,
757
10442
                          len).ToLocalChecked());
758
}
759
760
// This is similar to wcwidth except that it takes the current unicode
761
// character properties database into consideration, allowing it to
762
// correctly calculate the column widths of things like emoji's and
763
// newer wide characters. wcwidth, on the other hand, uses a fixed
764
// algorithm that does not take things like emoji into proper
765
// consideration.
766
//
767
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
768
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
769
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
770
// allow it to be input. Linux's PTY terminal prints control characters as
771
// Narrow rhombi.
772
//
773
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
774
// consonants are 0-width when combined with initial consonants; otherwise they
775
// are technically Wide. But many terminals (including Konsole and
776
// VTE/GLib-based) implement all medials and finals as 0-width.
777
//
778
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
779
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
780
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
781
1216874
static int GetColumnWidth(UChar32 codepoint,
782
                          bool ambiguous_as_full_width = false) {
783
  // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
784
  // codepoint as being full width, wide, ambiguous, neutral, narrow,
785
  // or halfwidth.
786
1216874
  const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH);
787

1216874
  switch (eaw) {
788
67460
    case U_EA_FULLWIDTH:
789
    case U_EA_WIDE:
790
67460
      return 2;
791
1008488
    case U_EA_AMBIGUOUS:
792
      // See: http://www.unicode.org/reports/tr11/#Ambiguous for details
793
1008488
      if (ambiguous_as_full_width) {
794
        return 2;
795
      }
796
      // If ambiguous_as_full_width is false:
797
      // Fall through
798
    case U_EA_NEUTRAL:
799
1136955
      if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) {
800
        return 2;
801
      }
802
      // Fall through
803
    case U_EA_HALFWIDTH:
804
    case U_EA_NARROW:
805
    default:
806
1149414
      const auto zero_width_mask = U_GC_CC_MASK |  // C0/C1 control code
807
                                  U_GC_CF_MASK |  // Format control character
808
                                  U_GC_ME_MASK |  // Enclosing mark
809
                                  U_GC_MN_MASK;   // Nonspacing mark
810

2298798
      if (codepoint != 0x00AD &&  // SOFT HYPHEN is Cf but not zero-width
811
1293549
          ((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
812
1149384
          u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
813
1005219
        return 0;
814
      }
815
144195
      return 1;
816
  }
817
}
818
819
// Returns the column width for the given String.
820
1195424
static void GetStringWidth(const FunctionCallbackInfo<Value>& args) {
821
1195424
  Environment* env = Environment::GetCurrent(args);
822
2390848
  CHECK(args[0]->IsString());
823
824
1195424
  bool ambiguous_as_full_width = args[1]->IsTrue();
825

1195424
  bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue();
826
827
1195424
  TwoByteValue value(env->isolate(), args[0]);
828
  // reinterpret_cast is required by windows to compile
829
1195424
  UChar* str = reinterpret_cast<UChar*>(*value);
830
  static_assert(sizeof(*str) == sizeof(**value),
831
                "sizeof(*str) == sizeof(**value)");
832
1195424
  UChar32 c = 0;
833
  UChar32 p;
834
1195424
  size_t n = 0;
835
1195424
  uint32_t width = 0;
836
837
2412298
  while (n < value.length()) {
838
1216874
    p = c;
839


1216874
    U16_NEXT(str, n, value.length(), c);
840
    // Don't count individual emoji codepoints that occur within an
841
    // emoji sequence. This is not necessarily foolproof. Some
842
    // environments display emoji sequences in the appropriate
843
    // condensed form (as a single emoji glyph), other environments
844
    // may not understand an emoji sequence and will display each
845
    // individual emoji separately. When this happens, the width
846
    // calculated will be off, and there's no reliable way of knowing
847
    // in advance if a particular sequence is going to be supported.
848
    // The expand_emoji_sequence option allows the caller to skip this
849
    // check and count each code within an emoji sequence separately.
850
    // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
851
2433748
    if (!expand_emoji_sequence &&
852


1216874
        n > 0 && p == 0x200d &&  // 0x200d == ZWJ (zero width joiner)
853
        (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) ||
854
         u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) {
855
      continue;
856
    }
857
1216874
    width += GetColumnWidth(c, ambiguous_as_full_width);
858
  }
859
2390848
  args.GetReturnValue().Set(width);
860
1195424
}
861
862
800
void Initialize(Local<Object> target,
863
                Local<Value> unused,
864
                Local<Context> context,
865
                void* priv) {
866
800
  Environment* env = Environment::GetCurrent(context);
867
800
  SetMethod(context, target, "toUnicode", ToUnicode);
868
800
  SetMethod(context, target, "toASCII", ToASCII);
869
800
  SetMethod(context, target, "getStringWidth", GetStringWidth);
870
871
  // One-shot converters
872
800
  SetMethod(context, target, "icuErrName", ICUErrorName);
873
800
  SetMethod(context, target, "transcode", Transcode);
874
875
  // ConverterObject
876
  {
877
800
    Local<FunctionTemplate> t = NewFunctionTemplate(env->isolate(), nullptr);
878
800
    t->Inherit(BaseObject::GetConstructorTemplate(env));
879
1600
    t->InstanceTemplate()->SetInternalFieldCount(
880
        ConverterObject::kInternalFieldCount);
881
    Local<String> converter_string =
882
800
        FIXED_ONE_BYTE_STRING(env->isolate(), "Converter");
883
800
    t->SetClassName(converter_string);
884
800
    env->set_i18n_converter_template(t->InstanceTemplate());
885
  }
886
887
800
  SetMethod(context, target, "getConverter", ConverterObject::Create);
888
800
  SetMethod(context, target, "decode", ConverterObject::Decode);
889
800
  SetMethod(context, target, "hasConverter", ConverterObject::Has);
890
800
}
891
892
5639
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
893
5639
  registry->Register(ToUnicode);
894
5639
  registry->Register(ToASCII);
895
5639
  registry->Register(GetStringWidth);
896
5639
  registry->Register(ICUErrorName);
897
5639
  registry->Register(Transcode);
898
5639
  registry->Register(ConverterObject::Create);
899
5639
  registry->Register(ConverterObject::Decode);
900
5639
  registry->Register(ConverterObject::Has);
901
5639
}
902
903
}  // namespace i18n
904
}  // namespace node
905
906
5710
NODE_BINDING_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize)
907
5639
NODE_BINDING_EXTERNAL_REFERENCE(icu, node::i18n::RegisterExternalReferences)
908
909
#endif  // NODE_HAVE_I18N_SUPPORT