GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1148 1207 95.1 %
Date: 2021-09-08 04:14:13 Branches: 988 1120 88.2 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <string>
11
#include <vector>
12
13
namespace node {
14
15
using errors::TryCatchScope;
16
17
using url::table_data::hex;
18
using url::table_data::C0_CONTROL_ENCODE_SET;
19
using url::table_data::FRAGMENT_ENCODE_SET;
20
using url::table_data::PATH_ENCODE_SET;
21
using url::table_data::USERINFO_ENCODE_SET;
22
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
23
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
24
25
using v8::Array;
26
using v8::Context;
27
using v8::Function;
28
using v8::FunctionCallbackInfo;
29
using v8::HandleScope;
30
using v8::Int32;
31
using v8::Integer;
32
using v8::Isolate;
33
using v8::Local;
34
using v8::MaybeLocal;
35
using v8::NewStringType;
36
using v8::Null;
37
using v8::Object;
38
using v8::String;
39
using v8::Undefined;
40
using v8::Value;
41
42
125510
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
43
125510
  return String::NewFromUtf8(isolate,
44
                             str.data(),
45
                             NewStringType::kNormal,
46
125510
                             str.length()).ToLocalChecked();
47
}
48
49
namespace url {
50
namespace {
51
52
// https://url.spec.whatwg.org/#eof-code-point
53
constexpr char kEOL = -1;
54
55
// Used in ToUSVString().
56
constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD;
57
58
// https://url.spec.whatwg.org/#concept-host
59
class URLHost {
60
 public:
61
  ~URLHost();
62
63
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
64
  void ParseIPv6Host(const char* input, size_t length);
65
  void ParseOpaqueHost(const char* input, size_t length);
66
  void ParseHost(const char* input,
67
                 size_t length,
68
                 bool is_special,
69
                 bool unicode = false);
70
71
4526
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
72
  std::string ToString() const;
73
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
74
  std::string ToStringMove();
75
76
 private:
77
  enum class HostType {
78
    H_FAILED,
79
    H_DOMAIN,
80
    H_IPV4,
81
    H_IPV6,
82
    H_OPAQUE,
83
  };
84
85
  union Value {
86
    std::string domain_or_opaque;
87
    uint32_t ipv4;
88
    uint16_t ipv6[8];
89
90
4526
    ~Value() {}
91
4526
    Value() : ipv4(0) {}
92
  };
93
94
  Value value_;
95
  HostType type_ = HostType::H_FAILED;
96
97
12427
  void Reset() {
98
    using string = std::string;
99
12427
    switch (type_) {
100
3888
      case HostType::H_DOMAIN:
101
      case HostType::H_OPAQUE:
102
3888
        value_.domain_or_opaque.~string();
103
3888
        break;
104
8539
      default:
105
8539
        break;
106
    }
107
12427
    type_ = HostType::H_FAILED;
108
12427
  }
109
110
  // Setting the string members of the union with = is brittle because
111
  // it relies on them being initialized to a state that requires no
112
  // destruction of old data.
113
  // For a long time, that worked well enough because ParseIPv6Host() happens
114
  // to zero-fill `value_`, but that really is relying on standard library
115
  // internals too much.
116
  // These helpers are the easiest solution but we might want to consider
117
  // just not forcing strings into an union.
118
458
  void SetOpaque(std::string&& string) {
119
458
    Reset();
120
458
    type_ = HostType::H_OPAQUE;
121
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
122
458
  }
123
124
3430
  void SetDomain(std::string&& string) {
125
3430
    Reset();
126
3430
    type_ = HostType::H_DOMAIN;
127
3430
    new(&value_.domain_or_opaque) std::string(std::move(string));
128
3430
  }
129
};
130
131
4526
URLHost::~URLHost() {
132
4526
  Reset();
133
4526
}
134
135
#define ARGS(XX)                                                              \
136
  XX(ARG_FLAGS)                                                               \
137
  XX(ARG_PROTOCOL)                                                            \
138
  XX(ARG_USERNAME)                                                            \
139
  XX(ARG_PASSWORD)                                                            \
140
  XX(ARG_HOST)                                                                \
141
  XX(ARG_PORT)                                                                \
142
  XX(ARG_PATH)                                                                \
143
  XX(ARG_QUERY)                                                               \
144
  XX(ARG_FRAGMENT)                                                            \
145
  XX(ARG_COUNT)  // This one has to be last.
146
147
#define ERR_ARGS(XX)                                                          \
148
  XX(ERR_ARG_FLAGS)                                                           \
149
  XX(ERR_ARG_INPUT)                                                           \
150
151
enum url_cb_args {
152
#define XX(name) name,
153
  ARGS(XX)
154
#undef XX
155
};
156
157
enum url_error_cb_args {
158
#define XX(name) name,
159
  ERR_ARGS(XX)
160
#undef XX
161
};
162
163
#define CHAR_TEST(bits, name, expr)                                           \
164
  template <typename T>                                                       \
165
  bool name(const T ch) {                                              \
166
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
167
                  "Character must be wider than " #bits " bits");             \
168
    return (expr);                                                            \
169
  }
170
171
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
172
  template <typename T>                                                       \
173
  bool name(const T ch1, const T ch2) {                                \
174
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
175
                  "Character must be wider than " #bits " bits");             \
176
    return (expr);                                                            \
177
  }                                                                           \
178
  template <typename T>                                                       \
179
  bool name(const std::basic_string<T>& str) {                         \
180
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
181
                  "Character must be wider than " #bits " bits");             \
182
    return str.length() >= 2 && name(str[0], str[1]);                         \
183
  }
184
185
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
186

12546122
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
187
188
// https://infra.spec.whatwg.org/#c0-control-or-space
189

258420
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
190
191
// https://infra.spec.whatwg.org/#ascii-digit
192

496288
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
193
194
// https://infra.spec.whatwg.org/#ascii-hex-digit
195


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
196
                               (ch >= 'A' && ch <= 'F') ||
197
                               (ch >= 'a' && ch <= 'f')))
198
199
// https://infra.spec.whatwg.org/#ascii-alpha
200


1104156
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
201
                            (ch >= 'a' && ch <= 'z')))
202
203
// https://infra.spec.whatwg.org/#ascii-alphanumeric
204

485600
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
205
206
// https://infra.spec.whatwg.org/#ascii-lowercase
207
template <typename T>
208
485672
T ASCIILowercase(T ch) {
209
485672
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
210
}
211
212
// https://url.spec.whatwg.org/#forbidden-host-code-point
213









85774
CHAR_TEST(8, IsForbiddenHostCodePoint,
214
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
215
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
216
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
217
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
218
          ch == '^' || ch == '|')
219
220
// https://url.spec.whatwg.org/#windows-drive-letter
221

6654
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
222
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
223
224
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
225

2074
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
226
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
227
228
// If a UTF-16 character is a low/trailing surrogate.
229
3
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
230
231
// If a UTF-16 character is a surrogate.
232
32
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
233
234
// If a UTF-16 surrogate is a low/trailing one.
235
19
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
236
237
#undef CHAR_TEST
238
#undef TWO_CHAR_STRING_TEST
239
240
241
10561160
bool BitAt(const uint8_t a[], const uint8_t i) {
242
10561160
  return !!(a[i >> 3] & (1 << (i & 7)));
243
}
244
245
// Appends ch to str. If ch position in encode_set is set, the ch will
246
// be percent-encoded then appended.
247
10561160
void AppendOrEscape(std::string* str,
248
                    const unsigned char ch,
249
                    const uint8_t encode_set[]) {
250
10561160
  if (BitAt(encode_set, ch))
251
1914
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
252
  else
253
10559246
    *str += ch;
254
10561160
}
255
256
template <typename T>
257
850
unsigned hex2bin(const T ch) {
258

850
  if (ch >= '0' && ch <= '9')
259
546
    return ch - '0';
260

304
  if (ch >= 'A' && ch <= 'F')
261
172
    return 10 + (ch - 'A');
262

132
  if (ch >= 'a' && ch <= 'f')
263
132
    return 10 + (ch - 'a');
264
  return static_cast<unsigned>(-1);
265
}
266
267
3874
std::string PercentDecode(const char* input, size_t len) {
268
3874
  std::string dest;
269
3874
  if (len == 0)
270
2
    return dest;
271
3872
  dest.reserve(len);
272
3872
  const char* pointer = input;
273
3872
  const char* end = input + len;
274
275
88196
  while (pointer < end) {
276
84324
    const char ch = pointer[0];
277
84324
    size_t remaining = end - pointer - 1;
278


84761
    if (ch != '%' || remaining < 2 ||
279
437
        (ch == '%' &&
280
437
         (!IsASCIIHexDigit(pointer[1]) ||
281
433
          !IsASCIIHexDigit(pointer[2])))) {
282
83899
      dest += ch;
283
83899
      pointer++;
284
83899
      continue;
285
    } else {
286
425
      unsigned a = hex2bin(pointer[1]);
287
425
      unsigned b = hex2bin(pointer[2]);
288
425
      char c = static_cast<char>(a * 16 + b);
289
425
      dest += c;
290
425
      pointer += 3;
291
    }
292
  }
293
3872
  return dest;
294
}
295
296
#define SPECIALS(XX)                                                          \
297
  XX(ftp, 21, "ftp:")                                                         \
298
  XX(file, -1, "file:")                                                       \
299
  XX(http, 80, "http:")                                                       \
300
  XX(https, 443, "https:")                                                    \
301
  XX(ws, 80, "ws:")                                                           \
302
  XX(wss, 443, "wss:")
303
304
305194
bool IsSpecial(const std::string& scheme) {
305
#define V(_, __, name) if (scheme == name) return true;
306



305194
  SPECIALS(V);
307
#undef V
308
5248
  return false;
309
}
310
311
122355
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
312
#define V(key, _, name) if (scheme == name)                                  \
313
    return env->url_special_##key##_string();
314



122355
  SPECIALS(V)
315
#undef V
316
  UNREACHABLE();
317
}
318
319
119244
int NormalizePort(const std::string& scheme, int p) {
320
#define V(_, port, name) if (scheme == name && p == port) return -1;
321









119244
  SPECIALS(V);
322
#undef V
323
8656
  return p;
324
}
325
326
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
327
3368
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
328
3368
  size_t length = end - p;
329
3083
  return length >= 2 &&
330

6487
    IsWindowsDriveLetter(p[0], p[1]) &&
331
36
    (length == 2 ||
332
36
      p[2] == '/' ||
333
14
      p[2] == '\\' ||
334
6
      p[2] == '?' ||
335
3372
      p[2] == '#');
336
}
337
338
#if defined(NODE_HAVE_I18N_SUPPORT)
339
195
bool ToUnicode(const std::string& input, std::string* output) {
340
390
  MaybeStackBuffer<char> buf;
341
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
342
    return false;
343
195
  output->assign(*buf, buf.length());
344
195
  return true;
345
}
346
347
3861
bool ToASCII(const std::string& input, std::string* output) {
348
7722
  MaybeStackBuffer<char> buf;
349
3861
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
350
124
    return false;
351
3737
  if (buf.length() == 0)
352
24
    return false;
353
3713
  output->assign(*buf, buf.length());
354
3713
  return true;
355
}
356
#else
357
// Intentional non-ops if ICU is not present.
358
bool ToUnicode(const std::string& input, std::string* output) {
359
  *output = input;
360
  return true;
361
}
362
363
bool ToASCII(const std::string& input, std::string* output) {
364
  *output = input;
365
  return true;
366
}
367
#endif
368
369
#define NS_IN6ADDRSZ 16
370
371
137
void URLHost::ParseIPv6Host(const char* input, size_t length) {
372
137
  CHECK_EQ(type_, HostType::H_FAILED);
373
374
  unsigned char buf[sizeof(struct in6_addr)];
375
137
  MaybeStackBuffer<char> ipv6(length + 1);
376
137
  *(*ipv6 + length) = 0;
377
137
  memset(buf, 0, sizeof(buf));
378
137
  memcpy(*ipv6, input, sizeof(const char) * length);
379
380
137
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
381
382
137
  if (ret != 0) {
383
92
    return;
384
  }
385
386
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
387
405
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
388
360
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
389
  }
390
391
45
  type_ = HostType::H_IPV6;
392
}
393
394
3736
int64_t ParseNumber(const char* start, const char* end) {
395
3736
  unsigned R = 10;
396

3736
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
397
48
    start += 2;
398
48
    R = 16;
399
  }
400
3736
  if (end - start == 0) {
401
8
    return 0;
402

3728
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
403
55
    start++;
404
55
    R = 8;
405
  }
406
3728
  const char* p = start;
407
408
4891
  while (p < end) {
409
4577
    const char ch = p[0];
410

4577
    switch (R) {
411
274
      case 8:
412

274
        if (ch < '0' || ch > '7')
413
29
          return -1;
414
245
        break;
415
4095
      case 10:
416
4095
        if (!IsASCIIDigit(ch))
417
3381
          return -1;
418
714
        break;
419
208
      case 16:
420
208
        if (!IsASCIIHexDigit(ch))
421
4
          return -1;
422
204
        break;
423
    }
424
1163
    p++;
425
  }
426
314
  return strtoll(start, nullptr, R);
427
}
428
429
3538
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
430
3538
  CHECK_EQ(type_, HostType::H_FAILED);
431
3538
  *is_ipv4 = false;
432
3538
  const char* pointer = input;
433
3538
  const char* mark = input;
434
3538
  const char* end = pointer + length;
435
3538
  int parts = 0;
436
3538
  uint32_t val = 0;
437
  uint64_t numbers[4];
438
3538
  int tooBigNumbers = 0;
439
3538
  if (length == 0)
440
3458
    return;
441
442
32383
  while (pointer <= end) {
443
32279
    const char ch = pointer < end ? pointer[0] : kEOL;
444
32279
    int64_t remaining = end - pointer - 1;
445

32279
    if (ch == '.' || ch == kEOL) {
446
3752
      if (++parts > static_cast<int>(arraysize(numbers)))
447
4
        return;
448
3748
      if (pointer == mark)
449
12
        return;
450
3736
      int64_t n = ParseNumber(mark, pointer);
451
3736
      if (n < 0)
452
3414
        return;
453
454
322
      if (n > 255) {
455
112
        tooBigNumbers++;
456
      }
457
322
      numbers[parts - 1] = n;
458
322
      mark = pointer + 1;
459

322
      if (ch == '.' && remaining == 0)
460
4
        break;
461
    }
462
28845
    pointer++;
463
  }
464
108
  CHECK_GT(parts, 0);
465
108
  *is_ipv4 = true;
466
467
  // If any but the last item in numbers is greater than 255, return failure.
468
  // If the last item in numbers is greater than or equal to
469
  // 256^(5 - the number of items in numbers), return failure.
470
104
  if (tooBigNumbers > 1 ||
471

272
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
472
100
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
473
28
    return;
474
  }
475
476
80
  type_ = HostType::H_IPV4;
477
80
  val = static_cast<uint32_t>(numbers[parts - 1]);
478
196
  for (int n = 0; n < parts - 1; n++) {
479
116
    double b = 3 - n;
480
116
    val +=
481
116
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
482
  }
483
484
80
  value_.ipv4 = val;
485
}
486
487
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
488
520
  CHECK_EQ(type_, HostType::H_FAILED);
489
520
  std::string output;
490
520
  output.reserve(length);
491
3053
  for (size_t i = 0; i < length; i++) {
492
2595
    const char ch = input[i];
493

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
494
62
      return;
495
    } else {
496
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
497
    }
498
  }
499
500
458
  SetOpaque(std::move(output));
501
}
502
503
4526
void URLHost::ParseHost(const char* input,
504
                        size_t length,
505
                        bool is_special,
506
                        bool unicode) {
507
4526
  CHECK_EQ(type_, HostType::H_FAILED);
508
4526
  const char* pointer = input;
509
510
4526
  if (length == 0)
511
1096
    return;
512
513
4526
  if (pointer[0] == '[') {
514
145
    if (pointer[length - 1] != ']')
515
8
      return;
516
137
    return ParseIPv6Host(++pointer, length - 2);
517
  }
518
519
4381
  if (!is_special)
520
520
    return ParseOpaqueHost(input, length);
521
522
  // First, we have to percent decode
523
3861
  std::string decoded = PercentDecode(input, length);
524
525
  // Then we have to punycode toASCII
526
3861
  if (!ToASCII(decoded, &decoded))
527
148
    return;
528
529
  // If any of the following characters are still present, we have to fail
530
86743
  for (size_t n = 0; n < decoded.size(); n++) {
531
83205
    const char ch = decoded[n];
532
83205
    if (IsForbiddenHostCodePoint(ch)) {
533
175
      return;
534
    }
535
  }
536
537
  // Check to see if it's an IPv4 IP address
538
  bool is_ipv4;
539
3538
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
540
3538
  if (is_ipv4)
541
108
    return;
542
543
  // If the unicode flag is set, run the result through punycode ToUnicode
544

3430
  if (unicode && !ToUnicode(decoded, &decoded))
545
    return;
546
547
  // It's not an IPv4 or IPv6 address, it must be a domain
548
3430
  SetDomain(std::move(decoded));
549
}
550
551
// Locates the longest sequence of 0 segments in an IPv6 address
552
// in order to use the :: compression when serializing
553
template <typename T>
554
45
T* FindLongestZeroSequence(T* values, size_t len) {
555
45
  T* start = values;
556
45
  T* end = start + len;
557
45
  T* result = nullptr;
558
559
45
  T* current = nullptr;
560
45
  unsigned counter = 0, longest = 1;
561
562
405
  while (start < end) {
563
360
    if (*start == 0) {
564
265
      if (current == nullptr)
565
59
        current = start;
566
265
      counter++;
567
    } else {
568
95
      if (counter > longest) {
569
37
        longest = counter;
570
37
        result = current;
571
      }
572
95
      counter = 0;
573
95
      current = nullptr;
574
    }
575
360
    start++;
576
  }
577
45
  if (counter > longest)
578
6
    result = current;
579
45
  return result;
580
}
581
582
4013
std::string URLHost::ToStringMove() {
583
4013
  std::string return_value;
584
4013
  switch (type_) {
585
3888
    case HostType::H_DOMAIN:
586
    case HostType::H_OPAQUE:
587
3888
      return_value = std::move(value_.domain_or_opaque);
588
3888
      break;
589
125
    default:
590
125
      return_value = ToString();
591
125
      break;
592
  }
593
4013
  Reset();
594
4013
  return return_value;
595
}
596
597
125
std::string URLHost::ToString() const {
598
250
  std::string dest;
599

125
  switch (type_) {
600
    case HostType::H_DOMAIN:
601
    case HostType::H_OPAQUE:
602
      return value_.domain_or_opaque;
603
      break;
604
80
    case HostType::H_IPV4: {
605
80
      dest.reserve(15);
606
80
      uint32_t value = value_.ipv4;
607
400
      for (int n = 0; n < 4; n++) {
608
        char buf[4];
609
320
        snprintf(buf, sizeof(buf), "%d", value % 256);
610
320
        dest.insert(0, buf);
611
320
        if (n < 3)
612
240
          dest.insert(0, 1, '.');
613
320
        value /= 256;
614
      }
615
80
      break;
616
    }
617
45
    case HostType::H_IPV6: {
618
45
      dest.reserve(41);
619
45
      dest += '[';
620
45
      const uint16_t* start = &value_.ipv6[0];
621
      const uint16_t* compress_pointer =
622
45
          FindLongestZeroSequence(start, 8);
623
45
      bool ignore0 = false;
624
405
      for (int n = 0; n <= 7; n++) {
625
360
        const uint16_t* piece = &value_.ipv6[n];
626

360
        if (ignore0 && *piece == 0)
627
245
          continue;
628
156
        else if (ignore0)
629
35
          ignore0 = false;
630
156
        if (compress_pointer == piece) {
631
41
          dest += n == 0 ? "::" : ":";
632
41
          ignore0 = true;
633
41
          continue;
634
        }
635
        char buf[5];
636
115
        snprintf(buf, sizeof(buf), "%x", *piece);
637
115
        dest += buf;
638
115
        if (n < 7)
639
76
          dest += ':';
640
      }
641
45
      dest += ']';
642
45
      break;
643
    }
644
    case HostType::H_FAILED:
645
      break;
646
  }
647
125
  return dest;
648
}
649
650
4184
bool ParseHost(const std::string& input,
651
               std::string* output,
652
               bool is_special,
653
               bool unicode = false) {
654
4184
  if (input.empty()) {
655
94
    output->clear();
656
94
    return true;
657
  }
658
8180
  URLHost host;
659
4090
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
660
4090
  if (host.ParsingFailed())
661
489
    return false;
662
3601
  *output = host.ToStringMove();
663
3601
  return true;
664
}
665
666
5402
std::vector<std::string> FromJSStringArray(Environment* env,
667
                                           Local<Array> array) {
668
5402
  std::vector<std::string> vec;
669
5402
  if (array->Length() > 0)
670
5386
    vec.reserve(array->Length());
671
64454
  for (size_t n = 0; n < array->Length(); n++) {
672
53650
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
673
53650
    if (val->IsString()) {
674
26825
      Utf8Value value(env->isolate(), val.As<String>());
675
26825
      vec.emplace_back(*value, value.length());
676
    }
677
  }
678
5402
  return vec;
679
}
680
681
5402
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
682
5402
  url_data base;
683
5402
  Local<Context> context = env->context();
684
685
  Local<Value> flags =
686
16206
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
687
5402
  if (flags->IsInt32())
688
10804
    base.flags = flags->Int32Value(context).FromJust();
689
690
  Local<Value> port =
691
16206
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
692
5402
  if (port->IsInt32())
693
16
    base.port = port->Int32Value(context).FromJust();
694
695
  Local<Value> scheme =
696
10804
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
697
5402
  base.scheme = Utf8Value(env->isolate(), scheme).out();
698
699
  auto GetStr = [&](std::string url_data::*member,
700
                    int flag,
701
                    Local<String> name,
702
27010
                    bool empty_as_present) {
703
54020
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
704
54020
    if (value->IsString()) {
705
30104
      Utf8Value utf8value(env->isolate(), value.As<String>());
706
15052
      (base.*member).assign(*utf8value, utf8value.length());
707

25856
      if (empty_as_present || value.As<String>()->Length() != 0) {
708
4270
        base.flags |= flag;
709
      }
710
    }
711
32412
  };
712
5402
  GetStr(&url_data::username,
713
         URL_FLAGS_HAS_USERNAME,
714
         env->username_string(),
715
         false);
716
5402
  GetStr(&url_data::password,
717
         URL_FLAGS_HAS_PASSWORD,
718
         env->password_string(),
719
         false);
720
5402
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
721
5402
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
722
5402
  GetStr(&url_data::fragment,
723
         URL_FLAGS_HAS_FRAGMENT,
724
         env->fragment_string(),
725
         true);
726
727
  Local<Value>
728
16206
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
729
5402
  if (path->IsArray()) {
730
5402
    base.flags |= URL_FLAGS_HAS_PATH;
731
5402
    base.path = FromJSStringArray(env, path.As<Array>());
732
  }
733
5402
  return base;
734
}
735
736
39623
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
737
39623
  url_data context;
738
  Local<Value> flags =
739
118869
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
740
39623
  if (flags->IsInt32()) {
741
    static constexpr int32_t kCopyFlagsMask =
742
        URL_FLAGS_SPECIAL |
743
        URL_FLAGS_CANNOT_BE_BASE |
744
        URL_FLAGS_HAS_USERNAME |
745
        URL_FLAGS_HAS_PASSWORD |
746
        URL_FLAGS_HAS_HOST;
747
39623
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
748
  }
749
  Local<Value> scheme =
750
118869
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
751
79246
  if (scheme->IsString()) {
752
79246
    Utf8Value value(env->isolate(), scheme);
753
39623
    context.scheme.assign(*value, value.length());
754
  }
755
  Local<Value> port =
756
118869
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
757
39623
  if (port->IsInt32())
758
237
    context.port = port.As<Int32>()->Value();
759
39623
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
760
    Local<Value> username =
761
221
        context_obj->Get(env->context(),
762
663
                         env->username_string()).ToLocalChecked();
763
442
    CHECK(username->IsString());
764
442
    Utf8Value value(env->isolate(), username);
765
221
    context.username.assign(*value, value.length());
766
  }
767
39623
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
768
    Local<Value> password =
769
209
        context_obj->Get(env->context(),
770
627
                         env->password_string()).ToLocalChecked();
771
418
    CHECK(password->IsString());
772
418
    Utf8Value value(env->isolate(), password);
773
209
    context.password.assign(*value, value.length());
774
  }
775
  Local<Value> host =
776
39623
      context_obj->Get(env->context(),
777
118869
                       env->host_string()).ToLocalChecked();
778
79246
  if (host->IsString()) {
779
79170
    Utf8Value value(env->isolate(), host);
780
39585
    context.host.assign(*value, value.length());
781
  }
782
39623
  return context;
783
}
784
785
// Single dot segment can be ".", "%2e", or "%2E"
786
2246015
bool IsSingleDotSegment(const std::string& str) {
787
2246015
  switch (str.size()) {
788
3713
    case 1:
789
3713
      return str == ".";
790
124106
    case 3:
791
124106
      return str[0] == '%' &&
792

124152
             str[1] == '2' &&
793
124152
             ASCIILowercase(str[2]) == 'e';
794
2118196
    default:
795
2118196
      return false;
796
  }
797
}
798
799
// Double dot segment can be:
800
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
801
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
802
1125273
bool IsDoubleDotSegment(const std::string& str) {
803

1125273
  switch (str.size()) {
804
2627
    case 2:
805
2627
      return str == "..";
806
318990
    case 4:
807

318990
      if (str[0] != '.' && str[0] != '%')
808
318971
        return false;
809
19
      return ((str[0] == '.' &&
810
13
               str[1] == '%' &&
811

8
               str[2] == '2' &&
812
42
               ASCIILowercase(str[3]) == 'e') ||
813
15
              (str[0] == '%' &&
814

12
               str[1] == '2' &&
815
6
               ASCIILowercase(str[2]) == 'e' &&
816
25
               str[3] == '.'));
817
65818
    case 6:
818
65818
      return (str[0] == '%' &&
819

24
              str[1] == '2' &&
820
12
              ASCIILowercase(str[2]) == 'e' &&
821
4
              str[3] == '%' &&
822

65834
              str[4] == '2' &&
823
65822
              ASCIILowercase(str[5]) == 'e');
824
737838
    default:
825
737838
      return false;
826
  }
827
}
828
829
5499
void ShortenUrlPath(struct url_data* url) {
830
5499
  if (url->path.empty()) return;
831


5618
  if (url->path.size() == 1 && url->scheme == "file:" &&
832
440
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
833
5178
  url->path.pop_back();
834
}
835
836
}  // anonymous namespace
837
838
202554
void URL::Parse(const char* input,
839
                size_t len,
840
                enum url_parse_state state_override,
841
                struct url_data* url,
842
                bool has_url,
843
                const struct url_data* base,
844
                bool has_base) {
845
202554
  const char* p = input;
846
202554
  const char* end = input + len;
847
848
202554
  if (!has_url) {
849
129233
    for (const char* ptr = p; ptr < end; ptr++) {
850
129214
      if (IsC0ControlOrSpace(*ptr))
851
56
        p++;
852
      else
853
129158
        break;
854
    }
855
129225
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
856
129206
      if (IsC0ControlOrSpace(*ptr))
857
48
        end--;
858
      else
859
129158
        break;
860
    }
861
129177
    input = p;
862
129177
    len = end - p;
863
  }
864
865
  // The spec says we should strip out any ASCII tabs or newlines.
866
  // In those cases, we create another std::string instance with the filtered
867
  // contents, but in the general case we avoid the overhead.
868
202554
  std::string whitespace_stripped;
869
12747653
  for (const char* ptr = p; ptr < end; ptr++) {
870
12545269
    if (!IsASCIITabOrNewline(*ptr))
871
12545099
      continue;
872
    // Hit tab or newline. Allocate storage, copy what we have until now,
873
    // and then iterate and filter all similar characters out.
874
170
    whitespace_stripped.reserve(len - 1);
875
170
    whitespace_stripped.assign(p, ptr - p);
876
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
877
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
878
853
      if (!IsASCIITabOrNewline(*ptr))
879
769
        whitespace_stripped += *ptr;
880
    }
881
882
    // Update variables like they should have looked like if the string
883
    // had been stripped of whitespace to begin with.
884
170
    input = whitespace_stripped.c_str();
885
170
    len = whitespace_stripped.size();
886
170
    p = input;
887
170
    end = input + len;
888
170
    break;
889
  }
890
891
202554
  bool atflag = false;  // Set when @ has been seen.
892
202554
  bool square_bracket_flag = false;  // Set inside of [...]
893
202554
  bool password_token_seen_flag = false;  // Set after a : after an username.
894
895
202554
  std::string buffer;
896
897
  // Set the initial parse state.
898
202554
  const bool has_state_override = state_override != kUnknownState;
899
202554
  enum url_parse_state state = has_state_override ? state_override :
900
                                                    kSchemeStart;
901
902

202554
  if (state < kSchemeStart || state > kFragment) {
903
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
904
    return;
905
  }
906
907
13200832
  while (p <= end) {
908
13006441
    const char ch = p < end ? p[0] : kEOL;
909
13006441
    bool special = (url->flags & URL_FLAGS_SPECIAL);
910
    bool cannot_be_base;
911

13006441
    bool special_back_slash = (special && ch == '\\');
912
913





13006441
    switch (state) {
914
129254
      case kSchemeStart:
915
129254
        if (IsASCIIAlpha(ch)) {
916
119351
          buffer += ASCIILowercase(ch);
917
119351
          state = kScheme;
918
9903
        } else if (!has_state_override) {
919
9893
          state = kNoScheme;
920
9893
          continue;
921
        } else {
922
10
          url->flags |= URL_FLAGS_FAILED;
923
10
          return;
924
        }
925
119351
        break;
926
485600
      case kScheme:
927


485600
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
928
366249
          buffer += ASCIILowercase(ch);
929

119351
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
930

118305
          if (has_state_override && buffer.size() == 0) {
931
            url->flags |= URL_FLAGS_TERMINATED;
932
            return;
933
          }
934
118305
          buffer += ':';
935
936
118305
          bool new_is_special = IsSpecial(buffer);
937
938
118305
          if (has_state_override) {
939
39
            if ((special != new_is_special) ||
940
39
                ((buffer == "file:") &&
941
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
942
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
943


104
                  (url->port != -1))) ||
944
39
                  (url->scheme == "file:" && url->host.empty())) {
945
32
              url->flags |= URL_FLAGS_TERMINATED;
946
32
              return;
947
            }
948
          }
949
950
118273
          url->scheme = std::move(buffer);
951
118273
          url->port = NormalizePort(url->scheme, url->port);
952
118273
          if (new_is_special) {
953
113731
            url->flags |= URL_FLAGS_SPECIAL;
954
113731
            special = true;
955
          } else {
956
4542
            url->flags &= ~URL_FLAGS_SPECIAL;
957
4542
            special = false;
958
          }
959

118273
          special_back_slash = (special && ch == '\\');
960
118273
          buffer.clear();
961
118273
          if (has_state_override)
962
27
            return;
963
118246
          if (url->scheme == "file:") {
964
110539
            state = kFile;
965
3181
          } else if (special &&
966

10888
                     has_base &&
967
1027
                     url->scheme == base->scheme) {
968
317
            state = kSpecialRelativeOrAuthority;
969
7390
          } else if (special) {
970
2864
            state = kSpecialAuthoritySlashes;
971

4526
          } else if (p + 1 < end && p[1] == '/') {
972
716
            state = kPathOrAuthority;
973
716
            p++;
974
          } else {
975
3810
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
976
3810
            url->flags |= URL_FLAGS_HAS_PATH;
977
3810
            url->path.emplace_back("");
978
3810
            state = kCannotBeBase;
979
118246
          }
980
1046
        } else if (!has_state_override) {
981
1038
          buffer.clear();
982
1038
          state = kNoScheme;
983
1038
          p = input;
984
1038
          continue;
985
        } else {
986
8
          url->flags |= URL_FLAGS_FAILED;
987
8
          return;
988
        }
989
484495
        break;
990
10931
      case kNoScheme:
991

10931
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
992

10931
        if (!has_base || (cannot_be_base && ch != '#')) {
993
7191
          url->flags |= URL_FLAGS_FAILED;
994
7191
          return;
995

3740
        } else if (cannot_be_base && ch == '#') {
996
28
          url->scheme = base->scheme;
997
28
          if (IsSpecial(url->scheme)) {
998
            url->flags |= URL_FLAGS_SPECIAL;
999
            special = true;
1000
          } else {
1001
28
            url->flags &= ~URL_FLAGS_SPECIAL;
1002
28
            special = false;
1003
          }
1004

28
          special_back_slash = (special && ch == '\\');
1005
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
1006
28
            url->flags |= URL_FLAGS_HAS_PATH;
1007
28
            url->path = base->path;
1008
          }
1009
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1010
4
            url->flags |= URL_FLAGS_HAS_QUERY;
1011
4
            url->query = base->query;
1012
          }
1013
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1014
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1015
            url->fragment = base->fragment;
1016
          }
1017
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1018
28
          state = kFragment;
1019

7424
        } else if (has_base &&
1020
3712
                   base->scheme != "file:") {
1021
323
          state = kRelative;
1022
323
          continue;
1023
        } else {
1024
3389
          url->scheme = "file:";
1025
3389
          url->flags |= URL_FLAGS_SPECIAL;
1026
3389
          special = true;
1027
3389
          state = kFile;
1028

3389
          special_back_slash = (special && ch == '\\');
1029
3389
          continue;
1030
        }
1031
28
        break;
1032
317
      case kSpecialRelativeOrAuthority:
1033

317
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1034
285
          state = kSpecialAuthorityIgnoreSlashes;
1035
285
          p++;
1036
        } else {
1037
32
          state = kRelative;
1038
32
          continue;
1039
        }
1040
285
        break;
1041
716
      case kPathOrAuthority:
1042
716
        if (ch == '/') {
1043
548
          state = kAuthority;
1044
        } else {
1045
168
          state = kPath;
1046
168
          continue;
1047
        }
1048
548
        break;
1049
355
      case kRelative:
1050
355
        url->scheme = base->scheme;
1051
355
        if (IsSpecial(url->scheme)) {
1052
255
          url->flags |= URL_FLAGS_SPECIAL;
1053
255
          special = true;
1054
        } else {
1055
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1056
100
          special = false;
1057
        }
1058

355
        special_back_slash = (special && ch == '\\');
1059

355
        switch (ch) {
1060
18
          case kEOL:
1061
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1062
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1063
4
              url->username = base->username;
1064
            }
1065
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1066
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1067
4
              url->password = base->password;
1068
            }
1069
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1070
16
              url->flags |= URL_FLAGS_HAS_HOST;
1071
16
              url->host = base->host;
1072
            }
1073
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1074
              url->flags |= URL_FLAGS_HAS_QUERY;
1075
              url->query = base->query;
1076
            }
1077
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1078
18
              url->flags |= URL_FLAGS_HAS_PATH;
1079
18
              url->path = base->path;
1080
            }
1081
18
            url->port = base->port;
1082
18
            break;
1083
76
          case '/':
1084
76
            state = kRelativeSlash;
1085
76
            break;
1086
38
          case '?':
1087
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1088
              url->flags |= URL_FLAGS_HAS_USERNAME;
1089
              url->username = base->username;
1090
            }
1091
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1092
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1093
              url->password = base->password;
1094
            }
1095
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1096
34
              url->flags |= URL_FLAGS_HAS_HOST;
1097
34
              url->host = base->host;
1098
            }
1099
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1100
38
              url->flags |= URL_FLAGS_HAS_PATH;
1101
38
              url->path = base->path;
1102
            }
1103
38
            url->port = base->port;
1104
38
            state = kQuery;
1105
38
            break;
1106
38
          case '#':
1107
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1108
              url->flags |= URL_FLAGS_HAS_USERNAME;
1109
              url->username = base->username;
1110
            }
1111
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1112
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1113
              url->password = base->password;
1114
            }
1115
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1116
34
              url->flags |= URL_FLAGS_HAS_HOST;
1117
34
              url->host = base->host;
1118
            }
1119
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1120
              url->flags |= URL_FLAGS_HAS_QUERY;
1121
              url->query = base->query;
1122
            }
1123
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1124
38
              url->flags |= URL_FLAGS_HAS_PATH;
1125
38
              url->path = base->path;
1126
            }
1127
38
            url->port = base->port;
1128
38
            state = kFragment;
1129
38
            break;
1130
185
          default:
1131
185
            if (special_back_slash) {
1132
18
              state = kRelativeSlash;
1133
            } else {
1134
167
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1135
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1136
1
                url->username = base->username;
1137
              }
1138
167
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1139
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1140
1
                url->password = base->password;
1141
              }
1142
167
              if (base->flags & URL_FLAGS_HAS_HOST) {
1143
147
                url->flags |= URL_FLAGS_HAS_HOST;
1144
147
                url->host = base->host;
1145
              }
1146
167
              if (base->flags & URL_FLAGS_HAS_PATH) {
1147
167
                url->flags |= URL_FLAGS_HAS_PATH;
1148
167
                url->path = base->path;
1149
167
                ShortenUrlPath(url);
1150
              }
1151
167
              url->port = base->port;
1152
167
              state = kPath;
1153
167
              continue;
1154
            }
1155
        }
1156
188
        break;
1157
94
      case kRelativeSlash:
1158


94
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1159
22
          state = kSpecialAuthorityIgnoreSlashes;
1160
72
        } else if (ch == '/') {
1161
6
          state = kAuthority;
1162
        } else {
1163
66
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1164
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1165
8
            url->username = base->username;
1166
          }
1167
66
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1168
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1169
4
            url->password = base->password;
1170
          }
1171
66
          if (base->flags & URL_FLAGS_HAS_HOST) {
1172
58
            url->flags |= URL_FLAGS_HAS_HOST;
1173
58
            url->host = base->host;
1174
          }
1175
66
          url->port = base->port;
1176
66
          state = kPath;
1177
66
          continue;
1178
        }
1179
28
        break;
1180
2864
      case kSpecialAuthoritySlashes:
1181
2864
        state = kSpecialAuthorityIgnoreSlashes;
1182

2864
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1183
2715
          p++;
1184
        } else {
1185
149
          continue;
1186
        }
1187
2715
        break;
1188
3248
      case kSpecialAuthorityIgnoreSlashes:
1189

3248
        if (ch != '/' && ch != '\\') {
1190
3171
          state = kAuthority;
1191
3171
          continue;
1192
        }
1193
77
        break;
1194
83703
      case kAuthority:
1195
83703
        if (ch == '@') {
1196
563
          if (atflag) {
1197
41
            buffer.reserve(buffer.size() + 3);
1198
41
            buffer.insert(0, "%40");
1199
          }
1200
563
          atflag = true;
1201
563
          size_t blen = buffer.size();
1202

563
          if (blen > 0 && buffer[0] != ':') {
1203
467
            url->flags |= URL_FLAGS_HAS_USERNAME;
1204
          }
1205
6632
          for (size_t n = 0; n < blen; n++) {
1206
6069
            const char bch = buffer[n];
1207
6069
            if (bch == ':') {
1208
442
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1209
442
              if (!password_token_seen_flag) {
1210
426
                password_token_seen_flag = true;
1211
426
                continue;
1212
              }
1213
            }
1214
5643
            if (password_token_seen_flag) {
1215
2714
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1216
            } else {
1217
2929
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1218
            }
1219
          }
1220
563
          buffer.clear();
1221

83140
        } else if (ch == kEOL ||
1222
79485
                   ch == '/' ||
1223
79453
                   ch == '?' ||
1224
79435
                   ch == '#' ||
1225
                   special_back_slash) {
1226

3725
          if (atflag && buffer.size() == 0) {
1227
52
            url->flags |= URL_FLAGS_FAILED;
1228
52
            return;
1229
          }
1230
3673
          p -= buffer.size() + 1;
1231
3673
          buffer.clear();
1232
3673
          state = kHost;
1233
        } else {
1234
79415
          buffer += ch;
1235
        }
1236
83651
        break;
1237
78554
      case kHost:
1238
      case kHostname:
1239

78554
        if (has_state_override && url->scheme == "file:") {
1240
12
          state = kFileHost;
1241
12
          continue;
1242

78542
        } else if (ch == ':' && !square_bracket_flag) {
1243
1017
          if (buffer.size() == 0) {
1244
24
            url->flags |= URL_FLAGS_FAILED;
1245
24
            return;
1246
          }
1247
993
          if (state_override == kHostname) {
1248
4
            return;
1249
          }
1250
989
          url->flags |= URL_FLAGS_HAS_HOST;
1251
989
          if (!ParseHost(buffer, &url->host, special)) {
1252
5
            url->flags |= URL_FLAGS_FAILED;
1253
5
            return;
1254
          }
1255
984
          buffer.clear();
1256
984
          state = kPort;
1257

77525
        } else if (ch == kEOL ||
1258
74585
                   ch == '/' ||
1259
74545
                   ch == '?' ||
1260
74519
                   ch == '#' ||
1261
                   special_back_slash) {
1262
3030
          p--;
1263

3030
          if (special && buffer.size() == 0) {
1264
21
            url->flags |= URL_FLAGS_FAILED;
1265
21
            return;
1266
          }
1267
325
          if (has_state_override &&
1268

3372
              buffer.size() == 0 &&
1269
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1270
38
               url->port != -1)) {
1271
8
            url->flags |= URL_FLAGS_TERMINATED;
1272
8
            return;
1273
          }
1274
3001
          url->flags |= URL_FLAGS_HAS_HOST;
1275
3001
          if (!ParseHost(buffer, &url->host, special)) {
1276
432
            url->flags |= URL_FLAGS_FAILED;
1277
432
            return;
1278
          }
1279
2569
          buffer.clear();
1280
2569
          state = kPathStart;
1281
2569
          if (has_state_override) {
1282
221
            return;
1283
          }
1284
        } else {
1285
74495
          if (ch == '[')
1286
139
            square_bracket_flag = true;
1287
74495
          if (ch == ']')
1288
135
            square_bracket_flag = false;
1289
74495
          buffer += ch;
1290
        }
1291
77827
        break;
1292
5515
      case kPort:
1293
5515
        if (IsASCIIDigit(ch)) {
1294
4468
          buffer += ch;
1295

1047
        } else if (has_state_override ||
1296
544
                   ch == kEOL ||
1297
36
                   ch == '/' ||
1298
36
                   ch == '?' ||
1299
36
                   ch == '#' ||
1300
                   special_back_slash) {
1301
1011
          if (buffer.size() > 0) {
1302
997
            unsigned port = 0;
1303
            // the condition port <= 0xffff prevents integer overflow
1304

5249
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1305
4252
              port = port * 10 + buffer[i] - '0';
1306
997
            if (port > 0xffff) {
1307
              // TODO(TimothyGu): This hack is currently needed for the host
1308
              // setter since it needs access to hostname if it is valid, and
1309
              // if the FAILED flag is set the entire response to JS layer
1310
              // will be empty.
1311
26
              if (state_override == kHost)
1312
2
                url->port = -1;
1313
              else
1314
24
                url->flags |= URL_FLAGS_FAILED;
1315
26
              return;
1316
            }
1317
            // the port is valid
1318
971
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1319
971
            if (url->port == -1)
1320
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1321
971
            buffer.clear();
1322
14
          } else if (has_state_override) {
1323
            // TODO(TimothyGu): Similar case as above.
1324
6
            if (state_override == kHost)
1325
2
              url->port = -1;
1326
            else
1327
4
              url->flags |= URL_FLAGS_TERMINATED;
1328
6
            return;
1329
          }
1330
979
          state = kPathStart;
1331
979
          continue;
1332
        } else {
1333
36
          url->flags |= URL_FLAGS_FAILED;
1334
36
          return;
1335
        }
1336
4468
        break;
1337
113928
      case kFile:
1338
113928
        url->scheme = "file:";
1339
113928
        url->host.clear();
1340
113928
        url->flags |= URL_FLAGS_HAS_HOST;
1341

113928
        if (ch == '/' || ch == '\\') {
1342
110657
          state = kFileSlash;
1343

3271
        } else if (has_base && base->scheme == "file:") {
1344

3252
          switch (ch) {
1345
4
            case kEOL:
1346
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1347
4
                url->host = base->host;
1348
              }
1349
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1350
4
                url->flags |= URL_FLAGS_HAS_PATH;
1351
4
                url->path = base->path;
1352
              }
1353
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1354
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1355
4
                url->query = base->query;
1356
              }
1357
4
              break;
1358
4
            case '?':
1359
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1360
4
                url->host = base->host;
1361
              }
1362
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1363
4
                url->flags |= URL_FLAGS_HAS_PATH;
1364
4
                url->path = base->path;
1365
              }
1366
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1367
4
              url->query.clear();
1368
4
              state = kQuery;
1369
4
              break;
1370
4
            case '#':
1371
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1372
4
                url->host = base->host;
1373
              }
1374
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1375
4
                url->flags |= URL_FLAGS_HAS_PATH;
1376
4
                url->path = base->path;
1377
              }
1378
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1379
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1380
4
                url->query = base->query;
1381
              }
1382
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1383
4
              url->fragment.clear();
1384
4
              state = kFragment;
1385
4
              break;
1386
3240
            default:
1387
3240
              url->query.clear();
1388
3240
              if (base->flags & URL_FLAGS_HAS_HOST) {
1389
3240
                url->host = base->host;
1390
              }
1391
3240
              if (base->flags & URL_FLAGS_HAS_PATH) {
1392
3240
                url->flags |= URL_FLAGS_HAS_PATH;
1393
3240
                url->path = base->path;
1394
              }
1395
3240
              if (!StartsWithWindowsDriveLetter(p, end)) {
1396
3216
                ShortenUrlPath(url);
1397
              } else {
1398
24
                url->path.clear();
1399
              }
1400
3240
              state = kPath;
1401
3240
              continue;
1402
          }
1403
        } else {
1404
19
          state = kPath;
1405
19
          continue;
1406
        }
1407
110669
        break;
1408
110657
      case kFileSlash:
1409

110657
        if (ch == '/' || ch == '\\') {
1410
110515
          state = kFileHost;
1411
        } else {
1412

142
          if (has_base && base->scheme == "file:") {
1413
128
            url->flags |= URL_FLAGS_HAS_HOST;
1414
128
            url->host = base->host;
1415

242
            if (!StartsWithWindowsDriveLetter(p, end) &&
1416
114
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1417
4
              url->flags |= URL_FLAGS_HAS_PATH;
1418
4
              url->path.push_back(base->path[0]);
1419
            }
1420
          }
1421
142
          state = kPath;
1422
142
          continue;
1423
        }
1424
110515
        break;
1425
111635
      case kFileHost:
1426

111635
        if (ch == kEOL ||
1427
1118
            ch == '/' ||
1428
1108
            ch == '\\' ||
1429
1108
            ch == '?' ||
1430
            ch == '#') {
1431
110515
          if (!has_state_override &&
1432

221042
              buffer.size() == 2 &&
1433
22
              IsWindowsDriveLetter(buffer)) {
1434
12
            state = kPath;
1435
110515
          } else if (buffer.size() == 0) {
1436
110321
            url->flags |= URL_FLAGS_HAS_HOST;
1437
110321
            url->host.clear();
1438
110321
            if (has_state_override)
1439
4
              return;
1440
110317
            state = kPathStart;
1441
          } else {
1442
194
            std::string host;
1443
194
            if (!ParseHost(buffer, &host, special)) {
1444
52
              url->flags |= URL_FLAGS_FAILED;
1445
52
              return;
1446
            }
1447
142
            if (host == "localhost")
1448
37
              host.clear();
1449
142
            url->flags |= URL_FLAGS_HAS_HOST;
1450
142
            url->host = host;
1451
142
            if (has_state_override)
1452
4
              return;
1453
138
            buffer.clear();
1454
138
            state = kPathStart;
1455
          }
1456
110467
          continue;
1457
        } else {
1458
1108
          buffer += ch;
1459
        }
1460
1108
        break;
1461
186412
      case kPathStart:
1462
186412
        if (IsSpecial(url->scheme)) {
1463
185864
          state = kPath;
1464

185864
          if (ch != '/' && ch != '\\') {
1465
73346
            continue;
1466
          }
1467

548
        } else if (!has_state_override && ch == '?') {
1468
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1469
6
          url->query.clear();
1470
6
          state = kQuery;
1471

542
        } else if (!has_state_override && ch == '#') {
1472
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1473
6
          url->fragment.clear();
1474
6
          state = kFragment;
1475
536
        } else if (ch != kEOL) {
1476
459
          state = kPath;
1477
459
          if (ch != '/') {
1478
35
            continue;
1479
          }
1480

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1481
2
          url->flags |= URL_FLAGS_HAS_PATH;
1482
2
          url->path.emplace_back("");
1483
        }
1484
113031
        break;
1485
11635797
      case kPath:
1486

11635797
        if (ch == kEOL ||
1487
10511115
            ch == '/' ||
1488
10511045
            special_back_slash ||
1489

10511045
            (!has_state_override && (ch == '?' || ch == '#'))) {
1490
1125273
          if (IsDoubleDotSegment(buffer)) {
1491
2116
            ShortenUrlPath(url);
1492

2116
            if (ch != '/' && !special_back_slash) {
1493
269
              url->flags |= URL_FLAGS_HAS_PATH;
1494
269
              url->path.emplace_back("");
1495
            }
1496
1124499
          } else if (IsSingleDotSegment(buffer) &&
1497

1124499
                     ch != '/' && !special_back_slash) {
1498
299
            url->flags |= URL_FLAGS_HAS_PATH;
1499
299
            url->path.emplace_back("");
1500
1122858
          } else if (!IsSingleDotSegment(buffer)) {
1501
2238741
            if (url->scheme == "file:" &&
1502
1266678
                url->path.empty() &&
1503

2388493
                buffer.size() == 2 &&
1504
100
                IsWindowsDriveLetter(buffer)) {
1505
98
              buffer[1] = ':';
1506
            }
1507
1121815
            url->flags |= URL_FLAGS_HAS_PATH;
1508
1121815
            url->path.emplace_back(std::move(buffer));
1509
          }
1510
1125273
          buffer.clear();
1511
2250546
          if (ch == '?') {
1512
480
            url->flags |= URL_FLAGS_HAS_QUERY;
1513
480
            url->query.clear();
1514
480
            state = kQuery;
1515
1124793
          } else if (ch == '#') {
1516
41
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1517
41
            url->fragment.clear();
1518
41
            state = kFragment;
1519
          }
1520
        } else {
1521
10510524
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1522
        }
1523
11635797
        break;
1524
36846
      case kCannotBeBase:
1525
36846
        switch (ch) {
1526
4
          case '?':
1527
4
            state = kQuery;
1528
4
            break;
1529
10
          case '#':
1530
10
            state = kFragment;
1531
10
            break;
1532
36832
          default:
1533
36832
            if (url->path.empty())
1534
              url->path.emplace_back("");
1535
36832
            else if (ch != kEOL)
1536
33036
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1537
        }
1538
36846
        break;
1539
5918
      case kQuery:
1540

5918
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1541
685
          url->flags |= URL_FLAGS_HAS_QUERY;
1542
685
          url->query = std::move(buffer);
1543
685
          buffer.clear();
1544
1060
          if (ch == '#')
1545
375
            state = kFragment;
1546
        } else {
1547
5233
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1548
                                                QUERY_ENCODE_SET_NONSPECIAL);
1549
        }
1550
5918
        break;
1551
4097
      case kFragment:
1552
4097
        switch (ch) {
1553
570
          case kEOL:
1554
570
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1555
570
            url->fragment = std::move(buffer);
1556
570
            break;
1557
3527
          default:
1558
3527
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1559
        }
1560
4097
        break;
1561
      default:
1562
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1563
        return;
1564
    }
1565
1566
12791642
    p++;
1567
  }
1568
}  // NOLINT(readability/fn_size)
1569
1570
// https://url.spec.whatwg.org/#url-serializing
1571
33754
std::string URL::SerializeURL(const struct url_data* url,
1572
                              bool exclude = false) {
1573
33754
  std::string output = url->scheme;
1574
33754
  if (url->flags & URL_FLAGS_HAS_HOST) {
1575
33754
    output += "//";
1576
33754
    if (url->flags & URL_FLAGS_HAS_USERNAME ||
1577
33754
        url->flags & URL_FLAGS_HAS_PASSWORD) {
1578
      if (url->flags & URL_FLAGS_HAS_USERNAME) {
1579
        output += url->username;
1580
      }
1581
      if (url->flags & URL_FLAGS_HAS_PASSWORD) {
1582
        output += ":" + url->password;
1583
      }
1584
      output += "@";
1585
    }
1586
33754
    output += url->host;
1587
33754
    if (url->port != -1) {
1588
      output += ":" + std::to_string(url->port);
1589
    }
1590
  }
1591
33754
  if (url->flags & URL_FLAGS_CANNOT_BE_BASE) {
1592
    output += url->path[0];
1593
  } else {
1594
    if (!(url->flags & URL_FLAGS_HAS_HOST) &&
1595

33754
          url->path.size() > 1 &&
1596
          url->path[0].empty()) {
1597
      output += "/.";
1598
    }
1599
358237
    for (size_t i = 1; i < url->path.size(); i++) {
1600
324483
      output += "/" + url->path[i];
1601
    }
1602
  }
1603
33754
  if (url->flags & URL_FLAGS_HAS_QUERY) {
1604
    output = "?" + url->query;
1605
  }
1606

33754
  if (!exclude && url->flags & URL_FLAGS_HAS_FRAGMENT) {
1607
    output = "#" + url->fragment;
1608
  }
1609
33754
  return output;
1610
}
1611
1612
namespace {
1613
127122
void SetArgs(Environment* env,
1614
             Local<Value> argv[ARG_COUNT],
1615
             const struct url_data& url) {
1616
127122
  Isolate* isolate = env->isolate();
1617
127122
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1618
254244
  argv[ARG_PROTOCOL] =
1619
127122
      url.flags & URL_FLAGS_SPECIAL ?
1620
122355
          GetSpecial(env, url.scheme) :
1621
4767
          OneByteString(isolate, url.scheme.c_str());
1622
127122
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1623
1220
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1624
127122
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1625
1180
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1626
127122
  if (url.flags & URL_FLAGS_HAS_HOST)
1627
246102
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1628
127122
  if (url.flags & URL_FLAGS_HAS_QUERY)
1629
1386
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1630
127122
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1631
1132
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1632
127122
  if (url.port > -1)
1633
2186
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1634
127122
  if (url.flags & URL_FLAGS_HAS_PATH)
1635
253100
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1636
127122
}
1637
1638
135017
void Parse(Environment* env,
1639
           Local<Value> recv,
1640
           const char* input,
1641
           size_t len,
1642
           enum url_parse_state state_override,
1643
           Local<Value> base_obj,
1644
           Local<Value> context_obj,
1645
           Local<Function> cb,
1646
           Local<Value> error_cb) {
1647
135017
  Isolate* isolate = env->isolate();
1648
135017
  Local<Context> context = env->context();
1649
135017
  HandleScope handle_scope(isolate);
1650
135017
  Context::Scope context_scope(context);
1651
1652
135017
  const bool has_context = context_obj->IsObject();
1653
135017
  const bool has_base = base_obj->IsObject();
1654
1655
135017
  url_data base;
1656
135017
  url_data url;
1657
135017
  if (has_context)
1658
39623
    url = HarvestContext(env, context_obj.As<Object>());
1659
135017
  if (has_base)
1660
5402
    base = HarvestBase(env, base_obj.As<Object>());
1661
1662
135017
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1663

135017
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1664
39623
      ((state_override != kUnknownState) &&
1665
39623
       (url.flags & URL_FLAGS_TERMINATED)))
1666
44
    return;
1667
1668
  // Define the return value placeholders
1669
134973
  const Local<Value> undef = Undefined(isolate);
1670
134973
  const Local<Value> null = Null(isolate);
1671
134973
  if (!(url.flags & URL_FLAGS_FAILED)) {
1672
    Local<Value> argv[] = {
1673
      undef,
1674
      undef,
1675
      undef,
1676
      undef,
1677
      null,  // host defaults to null
1678
      null,  // port defaults to null
1679
      undef,
1680
      null,  // query defaults to null
1681
      null,  // fragment defaults to null
1682
127122
    };
1683
127122
    SetArgs(env, argv, url);
1684
254244
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1685
7851
  } else if (error_cb->IsFunction()) {
1686
7721
    Local<Value> argv[2] = { undef, undef };
1687
7721
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1688
7721
    argv[ERR_ARG_INPUT] =
1689
15442
      String::NewFromUtf8(env->isolate(), input).ToLocalChecked();
1690
7721
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1691
7721
        .FromMaybe(Local<Value>());
1692
  }
1693
}
1694
1695
135017
void Parse(const FunctionCallbackInfo<Value>& args) {
1696
135017
  Environment* env = Environment::GetCurrent(args);
1697
135017
  CHECK_GE(args.Length(), 5);
1698
270034
  CHECK(args[0]->IsString());  // input
1699


365486
  CHECK(args[2]->IsUndefined() ||  // base context
1700
        args[2]->IsNull() ||
1701
        args[2]->IsObject());
1702


388903
  CHECK(args[3]->IsUndefined() ||  // context
1703
        args[3]->IsNull() ||
1704
        args[3]->IsObject());
1705
135017
  CHECK(args[4]->IsFunction());  // complete callback
1706

365428
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1707
1708
135017
  Utf8Value input(env->isolate(), args[0]);
1709
135017
  enum url_parse_state state_override = kUnknownState;
1710
135017
  if (args[1]->IsNumber()) {
1711
135017
    state_override = static_cast<enum url_parse_state>(
1712
270034
        args[1]->Uint32Value(env->context()).FromJust());
1713
  }
1714
1715
270034
  Parse(env, args.This(),
1716
135017
        *input, input.length(),
1717
        state_override,
1718
        args[2],
1719
        args[3],
1720
270034
        args[4].As<Function>(),
1721
        args[5]);
1722
135017
}
1723
1724
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1725
92
  Environment* env = Environment::GetCurrent(args);
1726
92
  CHECK_GE(args.Length(), 1);
1727
184
  CHECK(args[0]->IsString());
1728
184
  Utf8Value value(env->isolate(), args[0]);
1729
92
  std::string output;
1730
92
  size_t len = value.length();
1731
92
  output.reserve(len);
1732
756
  for (size_t n = 0; n < len; n++) {
1733
664
    const char ch = (*value)[n];
1734
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1735
  }
1736
276
  args.GetReturnValue().Set(
1737
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1738
92
}
1739
1740
17
void ToUSVString(const FunctionCallbackInfo<Value>& args) {
1741
17
  Environment* env = Environment::GetCurrent(args);
1742
17
  CHECK_GE(args.Length(), 2);
1743
34
  CHECK(args[0]->IsString());
1744
17
  CHECK(args[1]->IsNumber());
1745
1746
17
  TwoByteValue value(env->isolate(), args[0]);
1747
1748
17
  int64_t start = args[1]->IntegerValue(env->context()).FromJust();
1749
17
  CHECK_GE(start, 0);
1750
1751
49
  for (size_t i = start; i < value.length(); i++) {
1752
32
    char16_t c = value[i];
1753
32
    if (!IsUnicodeSurrogate(c)) {
1754
13
      continue;
1755

19
    } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) {
1756
16
      value[i] = kUnicodeReplacementCharacter;
1757
    } else {
1758
3
      char16_t d = value[i + 1];
1759
3
      if (IsUnicodeTrail(d)) {
1760
        i++;
1761
      } else {
1762
3
        value[i] = kUnicodeReplacementCharacter;
1763
      }
1764
    }
1765
  }
1766
1767
51
  args.GetReturnValue().Set(
1768
17
      String::NewFromTwoByte(env->isolate(),
1769
17
                             *value,
1770
                             NewStringType::kNormal,
1771
17
                             value.length()).ToLocalChecked());
1772
17
}
1773
1774
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1775
229
  Environment* env = Environment::GetCurrent(args);
1776
229
  CHECK_GE(args.Length(), 1);
1777
458
  CHECK(args[0]->IsString());
1778
229
  Utf8Value value(env->isolate(), args[0]);
1779
1780
229
  URLHost host;
1781
  // Assuming the host is used for a special scheme.
1782
229
  host.ParseHost(*value, value.length(), true);
1783
229
  if (host.ParsingFailed()) {
1784
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1785
12
    return;
1786
  }
1787
217
  std::string out = host.ToStringMove();
1788
651
  args.GetReturnValue().Set(
1789
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1790
}
1791
1792
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1793
207
  Environment* env = Environment::GetCurrent(args);
1794
207
  CHECK_GE(args.Length(), 1);
1795
414
  CHECK(args[0]->IsString());
1796
207
  Utf8Value value(env->isolate(), args[0]);
1797
1798
207
  URLHost host;
1799
  // Assuming the host is used for a special scheme.
1800
207
  host.ParseHost(*value, value.length(), true, true);
1801
207
  if (host.ParsingFailed()) {
1802
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1803
12
    return;
1804
  }
1805
195
  std::string out = host.ToStringMove();
1806
585
  args.GetReturnValue().Set(
1807
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1808
}
1809
1810
604
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1811
604
  Environment* env = Environment::GetCurrent(args);
1812
604
  CHECK_EQ(args.Length(), 1);
1813
604
  CHECK(args[0]->IsFunction());
1814
1208
  env->set_url_constructor_function(args[0].As<Function>());
1815
604
}
1816
1817
604
void Initialize(Local<Object> target,
1818
                Local<Value> unused,
1819
                Local<Context> context,
1820
                void* priv) {
1821
604
  Environment* env = Environment::GetCurrent(context);
1822
604
  env->SetMethod(target, "parse", Parse);
1823
604
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1824
604
  env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
1825
604
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1826
604
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1827
604
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1828
1829
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1830
16308
  FLAGS(XX)
1831
#undef XX
1832
1833
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1834
25368
  PARSESTATES(XX)
1835
#undef XX
1836
604
}
1837
}  // namespace
1838
1839
4845
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1840
4845
  registry->Register(Parse);
1841
4845
  registry->Register(EncodeAuthSet);
1842
4845
  registry->Register(ToUSVString);
1843
4845
  registry->Register(DomainToASCII);
1844
4845
  registry->Register(DomainToUnicode);
1845
4845
  registry->Register(SetURLConstructor);
1846
4845
}
1847
1848
8
std::string URL::ToFilePath() const {
1849
8
  if (context_.scheme != "file:") {
1850
1
    return "";
1851
  }
1852
1853
#ifdef _WIN32
1854
  const char* slash = "\\";
1855
  auto is_slash = [] (char ch) {
1856
    return ch == '/' || ch == '\\';
1857
  };
1858
#else
1859
7
  const char* slash = "/";
1860
46
  auto is_slash = [] (char ch) {
1861
46
    return ch == '/';
1862
  };
1863

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1864
7
      context_.host.length() > 0) {
1865
1
    return "";
1866
  }
1867
#endif
1868
12
  std::string decoded_path;
1869
18
  for (const std::string& part : context_.path) {
1870
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1871
58
    for (char& ch : decoded) {
1872
46
      if (is_slash(ch)) {
1873
1
        return "";
1874
      }
1875
    }
1876
12
    decoded_path += slash + decoded;
1877
  }
1878
1879
#ifdef _WIN32
1880
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1881
1882
  // If hostname is set, then we have a UNC path. Pass the hostname through
1883
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1884
  // need to worry about percent encoding because the URL parser will have
1885
  // already taken care of that for us. Note that this only causes IDNs with an
1886
  // appropriate `xn--` prefix to be decoded.
1887
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1888
      context_.host.length() > 0) {
1889
    std::string unicode_host;
1890
    if (!ToUnicode(context_.host, &unicode_host)) {
1891
      return "";
1892
    }
1893
    return "\\\\" + unicode_host + decoded_path;
1894
  }
1895
  // Otherwise, it's a local path that requires a drive letter.
1896
  if (decoded_path.length() < 3) {
1897
    return "";
1898
  }
1899
  if (decoded_path[2] != ':' ||
1900
      !IsASCIIAlpha(decoded_path[1])) {
1901
    return "";
1902
  }
1903
  // Strip out the leading '\'.
1904
  return decoded_path.substr(1);
1905
#else
1906
5
  return decoded_path;
1907
#endif
1908
}
1909
1910
33754
URL URL::FromFilePath(const std::string& file_path) {
1911
67508
  URL url("file://");
1912
67508
  std::string escaped_file_path;
1913
3614401
  for (size_t i = 0; i < file_path.length(); ++i) {
1914
3580647
    escaped_file_path += file_path[i];
1915
3580647
    if (file_path[i] == '%')
1916
11
      escaped_file_path += "25";
1917
  }
1918
33754
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1919
             &url.context_, true, nullptr, false);
1920
33754
  return url;
1921
}
1922
1923
// This function works by calling out to a JS function that creates and
1924
// returns the JS URL object. Be mindful of the JS<->Native boundary
1925
// crossing that is required.
1926
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1927
  Isolate* isolate = env->isolate();
1928
  Local<Context> context = env->context();
1929
  Context::Scope context_scope(context);
1930
1931
  const Local<Value> undef = Undefined(isolate);
1932
  const Local<Value> null = Null(isolate);
1933
1934
  if (context_.flags & URL_FLAGS_FAILED)
1935
    return Local<Value>();
1936
1937
  Local<Value> argv[] = {
1938
    undef,
1939
    undef,
1940
    undef,
1941
    undef,
1942
    null,  // host defaults to null
1943
    null,  // port defaults to null
1944
    undef,
1945
    null,  // query defaults to null
1946
    null,  // fragment defaults to null
1947
  };
1948
  SetArgs(env, argv, context_);
1949
1950
  MaybeLocal<Value> ret;
1951
  {
1952
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1953
1954
    // The SetURLConstructor method must have been called already to
1955
    // set the constructor function used below. SetURLConstructor is
1956
    // called automatically when the internal/url.js module is loaded
1957
    // during the internal/bootstrap/node.js processing.
1958
    ret = env->url_constructor_function()
1959
        ->Call(env->context(), undef, arraysize(argv), argv);
1960
  }
1961
1962
  return ret;
1963
}
1964
1965
}  // namespace url
1966
}  // namespace node
1967
1968
4918
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1969
4845
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)