GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1129 1187 95.1 %
Date: 2022-05-03 04:14:50 Branches: 970 1096 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <numeric>
11
#include <string>
12
#include <vector>
13
14
namespace node {
15
16
using errors::TryCatchScope;
17
18
using url::table_data::hex;
19
using url::table_data::C0_CONTROL_ENCODE_SET;
20
using url::table_data::FRAGMENT_ENCODE_SET;
21
using url::table_data::PATH_ENCODE_SET;
22
using url::table_data::USERINFO_ENCODE_SET;
23
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
24
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
25
26
using v8::Array;
27
using v8::Context;
28
using v8::Function;
29
using v8::FunctionCallbackInfo;
30
using v8::HandleScope;
31
using v8::Int32;
32
using v8::Integer;
33
using v8::Isolate;
34
using v8::Local;
35
using v8::MaybeLocal;
36
using v8::NewStringType;
37
using v8::Null;
38
using v8::Object;
39
using v8::String;
40
using v8::Undefined;
41
using v8::Value;
42
43
143012
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
44
143012
  return String::NewFromUtf8(isolate,
45
                             str.data(),
46
                             NewStringType::kNormal,
47
143012
                             str.length()).ToLocalChecked();
48
}
49
50
namespace url {
51
namespace {
52
53
// https://url.spec.whatwg.org/#eof-code-point
54
constexpr char kEOL = -1;
55
56
// https://url.spec.whatwg.org/#concept-host
57
class URLHost {
58
 public:
59
  ~URLHost();
60
61
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62
  void ParseIPv6Host(const char* input, size_t length);
63
  void ParseOpaqueHost(const char* input, size_t length);
64
  void ParseHost(const char* input,
65
                 size_t length,
66
                 bool is_special,
67
                 bool unicode = false);
68
69
5299
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
70
  std::string ToString() const;
71
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
72
  std::string ToStringMove();
73
74
 private:
75
  enum class HostType {
76
    H_FAILED,
77
    H_DOMAIN,
78
    H_IPV4,
79
    H_IPV6,
80
    H_OPAQUE,
81
  };
82
83
  union Value {
84
    std::string domain_or_opaque;
85
    uint32_t ipv4;
86
    uint16_t ipv6[8];
87
88
5299
    ~Value() {}
89
5299
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
14260
  void Reset() {
96
    using string = std::string;
97
14260
    switch (type_) {
98
4183
      case HostType::H_DOMAIN:
99
      case HostType::H_OPAQUE:
100
4183
        value_.domain_or_opaque.~string();
101
4183
        break;
102
10077
      default:
103
10077
        break;
104
    }
105
14260
    type_ = HostType::H_FAILED;
106
14260
  }
107
108
  // Setting the string members of the union with = is brittle because
109
  // it relies on them being initialized to a state that requires no
110
  // destruction of old data.
111
  // For a long time, that worked well enough because ParseIPv6Host() happens
112
  // to zero-fill `value_`, but that really is relying on standard library
113
  // internals too much.
114
  // These helpers are the easiest solution but we might want to consider
115
  // just not forcing strings into an union.
116
458
  void SetOpaque(std::string&& string) {
117
458
    Reset();
118
458
    type_ = HostType::H_OPAQUE;
119
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
458
  }
121
122
3725
  void SetDomain(std::string&& string) {
123
3725
    Reset();
124
3725
    type_ = HostType::H_DOMAIN;
125
3725
    new(&value_.domain_or_opaque) std::string(std::move(string));
126
3725
  }
127
};
128
129
5299
URLHost::~URLHost() {
130
5299
  Reset();
131
5299
}
132
133
#define ARGS(XX)                                                              \
134
  XX(ARG_FLAGS)                                                               \
135
  XX(ARG_PROTOCOL)                                                            \
136
  XX(ARG_USERNAME)                                                            \
137
  XX(ARG_PASSWORD)                                                            \
138
  XX(ARG_HOST)                                                                \
139
  XX(ARG_PORT)                                                                \
140
  XX(ARG_PATH)                                                                \
141
  XX(ARG_QUERY)                                                               \
142
  XX(ARG_FRAGMENT)                                                            \
143
  XX(ARG_COUNT)  // This one has to be last.
144
145
enum url_cb_args {
146
#define XX(name) name,
147
  ARGS(XX)
148
#undef XX
149
};
150
151
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
152
  template <typename T>                                                       \
153
  bool name(const T ch1, const T ch2) {                                \
154
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
155
                  "Character must be wider than " #bits " bits");             \
156
    return (expr);                                                            \
157
  }                                                                           \
158
  template <typename T>                                                       \
159
  bool name(const std::basic_string<T>& str) {                         \
160
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
161
                  "Character must be wider than " #bits " bits");             \
162
    return str.length() >= 2 && name(str[0], str[1]);                         \
163
  }
164
165
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166

13911470
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
167
168
// https://infra.spec.whatwg.org/#c0-control-or-space
169

292550
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
170
171
// https://infra.spec.whatwg.org/#ascii-digit
172

567797
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
173
174
// https://infra.spec.whatwg.org/#ascii-hex-digit
175


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
176
                               (ch >= 'A' && ch <= 'F') ||
177
                               (ch >= 'a' && ch <= 'f')))
178
179
// https://infra.spec.whatwg.org/#ascii-alpha
180


1256098
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
181
                            (ch >= 'a' && ch <= 'z')))
182
183
// https://infra.spec.whatwg.org/#ascii-alphanumeric
184

551458
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
185
186
// https://infra.spec.whatwg.org/#ascii-lowercase
187
template <typename T>
188
551530
T ASCIILowercase(T ch) {
189
551530
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
190
}
191
192
// https://url.spec.whatwg.org/#forbidden-host-code-point
193









91197
CHAR_TEST(8, IsForbiddenHostCodePoint,
194
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
195
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
196
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
197
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
198
          ch == '^' || ch == '|')
199
200
// https://url.spec.whatwg.org/#windows-drive-letter
201

12610
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
202
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
203
204
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
205

2720
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
206
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
207
208
#undef TWO_CHAR_STRING_TEST
209
210
11712791
bool BitAt(const uint8_t a[], const uint8_t i) {
211
11712791
  return !!(a[i >> 3] & (1 << (i & 7)));
212
}
213
214
// Appends ch to str. If ch position in encode_set is set, the ch will
215
// be percent-encoded then appended.
216
11712791
void AppendOrEscape(std::string* str,
217
                    const unsigned char ch,
218
                    const uint8_t encode_set[]) {
219
11712791
  if (BitAt(encode_set, ch))
220
1921
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
221
  else
222
11710870
    *str += ch;
223
11712791
}
224
225
850
unsigned hex2bin(const char ch) {
226

850
  if (ch >= '0' && ch <= '9')
227
546
    return ch - '0';
228

304
  if (ch >= 'A' && ch <= 'F')
229
172
    return 10 + (ch - 'A');
230

132
  if (ch >= 'a' && ch <= 'f')
231
132
    return 10 + (ch - 'a');
232
  UNREACHABLE();
233
}
234
235
4433
std::string PercentDecode(const char* input, size_t len) {
236
4433
  std::string dest;
237
4433
  if (len == 0)
238
2
    return dest;
239
4431
  dest.reserve(len);
240
4431
  const char* pointer = input;
241
4431
  const char* end = input + len;
242
243
94178
  while (pointer < end) {
244
89747
    const char ch = pointer[0];
245
89747
    size_t remaining = end - pointer - 1;
246


90184
    if (ch != '%' || remaining < 2 ||
247
437
        (ch == '%' &&
248
437
         (!IsASCIIHexDigit(pointer[1]) ||
249
433
          !IsASCIIHexDigit(pointer[2])))) {
250
89322
      dest += ch;
251
89322
      pointer++;
252
89322
      continue;
253
    } else {
254
425
      unsigned a = hex2bin(pointer[1]);
255
425
      unsigned b = hex2bin(pointer[2]);
256
425
      char c = static_cast<char>(a * 16 + b);
257
425
      dest += c;
258
425
      pointer += 3;
259
    }
260
  }
261
4431
  return dest;
262
}
263
264
#define SPECIALS(XX)                                                          \
265
  XX(ftp, 21, "ftp:")                                                         \
266
  XX(file, -1, "file:")                                                       \
267
  XX(http, 80, "http:")                                                       \
268
  XX(https, 443, "https:")                                                    \
269
  XX(ws, 80, "ws:")                                                           \
270
  XX(wss, 443, "wss:")
271
272
333539
bool IsSpecial(const std::string& scheme) {
273
#define V(_, __, name) if (scheme == name) return true;
274



333539
  SPECIALS(V);
275
#undef V
276
6460
  return false;
277
}
278
279
139349
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
280
#define V(key, _, name) if (scheme == name)                                  \
281
    return env->url_special_##key##_string();
282



139349
  SPECIALS(V)
283
#undef V
284
  UNREACHABLE();
285
}
286
287
132829
int NormalizePort(const std::string& scheme, int p) {
288
#define V(_, port, name) if (scheme == name && p == port) return -1;
289









132829
  SPECIALS(V);
290
#undef V
291
11272
  return p;
292
}
293
294
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
295
6764
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
296
6764
  size_t length = end - p;
297
6061
  return length >= 2 &&
298

12861
    IsWindowsDriveLetter(p[0], p[1]) &&
299
36
    (length == 2 ||
300
36
      p[2] == '/' ||
301
14
      p[2] == '\\' ||
302
6
      p[2] == '?' ||
303
6768
      p[2] == '#');
304
}
305
306
#if defined(NODE_HAVE_I18N_SUPPORT)
307
195
bool ToUnicode(const std::string& input, std::string* output) {
308
390
  MaybeStackBuffer<char> buf;
309
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
310
    return false;
311
195
  output->assign(*buf, buf.length());
312
195
  return true;
313
}
314
315
4420
bool ToASCII(const std::string& input, std::string* output) {
316
8840
  MaybeStackBuffer<char> buf;
317
4420
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
318
124
    return false;
319
4296
  if (buf.length() == 0)
320
24
    return false;
321
4272
  output->assign(*buf, buf.length());
322
4272
  return true;
323
}
324
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
325
// Intentional non-ops if ICU is not present.
326
bool ToUnicode(const std::string& input, std::string* output) {
327
  *output = input;
328
  return true;
329
}
330
331
bool ToASCII(const std::string& input, std::string* output) {
332
  *output = input;
333
  return true;
334
}
335
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
336
337
#define NS_IN6ADDRSZ 16
338
339
351
void URLHost::ParseIPv6Host(const char* input, size_t length) {
340
351
  CHECK_EQ(type_, HostType::H_FAILED);
341
342
  unsigned char buf[sizeof(struct in6_addr)];
343
351
  MaybeStackBuffer<char> ipv6(length + 1);
344
351
  *(*ipv6 + length) = 0;
345
351
  memset(buf, 0, sizeof(buf));
346
351
  memcpy(*ipv6, input, sizeof(const char) * length);
347
348
351
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
349
350
351
  if (ret != 0) {
351
92
    return;
352
  }
353
354
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
355
2331
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
356
2072
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
357
  }
358
359
259
  type_ = HostType::H_IPV6;
360
}
361
362
5075
int64_t ParseNumber(const char* start, const char* end) {
363
5075
  unsigned R = 10;
364

5075
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
365
48
    start += 2;
366
48
    R = 16;
367
  }
368
5075
  if (end - start == 0) {
369
8
    return 0;
370

5067
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
371
55
    start++;
372
55
    R = 8;
373
  }
374
5067
  const char* p = start;
375
376
7816
  while (p < end) {
377
6462
    const char ch = p[0];
378

6462
    switch (R) {
379
274
      case 8:
380

274
        if (ch < '0' || ch > '7')
381
29
          return -1;
382
245
        break;
383
5980
      case 10:
384
5980
        if (!IsASCIIDigit(ch))
385
3680
          return -1;
386
2300
        break;
387
208
      case 16:
388
208
        if (!IsASCIIHexDigit(ch))
389
4
          return -1;
390
204
        break;
391
    }
392
2749
    p++;
393
  }
394
1354
  return strtoll(start, nullptr, R);
395
}
396
397
4097
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
398
4097
  CHECK_EQ(type_, HostType::H_FAILED);
399
4097
  *is_ipv4 = false;
400
4097
  const char* pointer = input;
401
4097
  const char* mark = input;
402
4097
  const char* end = pointer + length;
403
4097
  int parts = 0;
404
4097
  uint32_t val = 0;
405
  uint64_t numbers[4];
406
4097
  int tooBigNumbers = 0;
407
4097
  if (length == 0)
408
3761
    return;
409
410
38335
  while (pointer <= end) {
411
37975
    const char ch = pointer < end ? pointer[0] : kEOL;
412
37975
    int64_t remaining = end - pointer - 1;
413

37975
    if (ch == '.' || ch == kEOL) {
414
      // If parts’s size is greater than 4, validation error, return failure.
415
5095
      if (++parts > static_cast<int>(arraysize(numbers))) {
416
8
        *is_ipv4 = true;
417
8
        return;
418
      }
419
5087
      if (pointer == mark)
420
12
        return;
421
5075
      int64_t n = ParseNumber(mark, pointer);
422
5075
      if (n < 0)
423
3713
        return;
424
425
1362
      if (n > 255) {
426
128
        tooBigNumbers++;
427
      }
428
1362
      numbers[parts - 1] = n;
429
1362
      mark = pointer + 1;
430

1362
      if (ch == '.' && remaining == 0)
431
4
        break;
432
    }
433
34238
    pointer++;
434
  }
435
364
  CHECK_GT(parts, 0);
436
364
  *is_ipv4 = true;
437
438
  // If any but the last item in numbers is greater than 255, return failure.
439
  // If the last item in numbers is greater than or equal to
440
  // 256^(5 - the number of items in numbers), return failure.
441
360
  if (tooBigNumbers > 1 ||
442

784
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
443
356
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
444
28
    return;
445
  }
446
447
336
  type_ = HostType::H_IPV4;
448
336
  val = static_cast<uint32_t>(numbers[parts - 1]);
449
1220
  for (int n = 0; n < parts - 1; n++) {
450
884
    double b = 3 - n;
451
884
    val +=
452
884
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
453
  }
454
455
336
  value_.ipv4 = val;
456
}
457
458
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
459
520
  CHECK_EQ(type_, HostType::H_FAILED);
460
520
  std::string output;
461
520
  output.reserve(length);
462
3053
  for (size_t i = 0; i < length; i++) {
463
2595
    const char ch = input[i];
464

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
465
62
      return;
466
    } else {
467
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
468
    }
469
  }
470
471
458
  SetOpaque(std::move(output));
472
}
473
474
5299
void URLHost::ParseHost(const char* input,
475
                        size_t length,
476
                        bool is_special,
477
                        bool unicode) {
478
5299
  CHECK_EQ(type_, HostType::H_FAILED);
479
5299
  const char* pointer = input;
480
481
5299
  if (length == 0)
482
1574
    return;
483
484
5299
  if (pointer[0] == '[') {
485
359
    if (pointer[length - 1] != ']')
486
8
      return;
487
351
    return ParseIPv6Host(++pointer, length - 2);
488
  }
489
490
4940
  if (!is_special)
491
520
    return ParseOpaqueHost(input, length);
492
493
  // First, we have to percent decode
494
4420
  std::string decoded = PercentDecode(input, length);
495
496
  // Then we have to punycode toASCII
497
4420
  if (!ToASCII(decoded, &decoded))
498
148
    return;
499
500
  // If any of the following characters are still present, we have to fail
501
92725
  for (size_t n = 0; n < decoded.size(); n++) {
502
88628
    const char ch = decoded[n];
503
88628
    if (IsForbiddenHostCodePoint(ch)) {
504
175
      return;
505
    }
506
  }
507
508
  // Check to see if it's an IPv4 IP address
509
  bool is_ipv4;
510
4097
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
511
4097
  if (is_ipv4)
512
372
    return;
513
514
  // If the unicode flag is set, run the result through punycode ToUnicode
515

3725
  if (unicode && !ToUnicode(decoded, &decoded))
516
    return;
517
518
  // It's not an IPv4 or IPv6 address, it must be a domain
519
3725
  SetDomain(std::move(decoded));
520
}
521
522
// Locates the longest sequence of 0 segments in an IPv6 address
523
// in order to use the :: compression when serializing
524
template <typename T>
525
259
T* FindLongestZeroSequence(T* values, size_t len) {
526
259
  T* start = values;
527
259
  T* end = start + len;
528
259
  T* result = nullptr;
529
530
259
  T* current = nullptr;
531
259
  unsigned counter = 0, longest = 1;
532
533
2331
  while (start < end) {
534
2072
    if (*start == 0) {
535
1763
      if (current == nullptr)
536
273
        current = start;
537
1763
      counter++;
538
    } else {
539
309
      if (counter > longest) {
540
251
        longest = counter;
541
251
        result = current;
542
      }
543
309
      counter = 0;
544
309
      current = nullptr;
545
    }
546
2072
    start++;
547
  }
548
259
  if (counter > longest)
549
6
    result = current;
550
259
  return result;
551
}
552
553
4778
std::string URLHost::ToStringMove() {
554
4778
  std::string return_value;
555
4778
  switch (type_) {
556
4183
    case HostType::H_DOMAIN:
557
    case HostType::H_OPAQUE:
558
4183
      return_value = std::move(value_.domain_or_opaque);
559
4183
      break;
560
595
    default:
561
595
      return_value = ToString();
562
595
      break;
563
  }
564
4778
  Reset();
565
4778
  return return_value;
566
}
567
568
595
std::string URLHost::ToString() const {
569
1190
  std::string dest;
570

595
  switch (type_) {
571
    case HostType::H_DOMAIN:
572
    case HostType::H_OPAQUE:
573
      return value_.domain_or_opaque;
574
336
    case HostType::H_IPV4: {
575
336
      dest.reserve(15);
576
336
      uint32_t value = value_.ipv4;
577
1680
      for (int n = 0; n < 4; n++) {
578
1344
        dest.insert(0, std::to_string(value % 256));
579
1344
        if (n < 3)
580
1008
          dest.insert(0, 1, '.');
581
1344
        value /= 256;
582
      }
583
336
      break;
584
    }
585
259
    case HostType::H_IPV6: {
586
259
      dest.reserve(41);
587
259
      dest += '[';
588
259
      const uint16_t* start = &value_.ipv6[0];
589
      const uint16_t* compress_pointer =
590
259
          FindLongestZeroSequence(start, 8);
591
259
      bool ignore0 = false;
592
2331
      for (int n = 0; n <= 7; n++) {
593
2072
        const uint16_t* piece = &value_.ipv6[n];
594

2072
        if (ignore0 && *piece == 0)
595
1743
          continue;
596
584
        else if (ignore0)
597
249
          ignore0 = false;
598
584
        if (compress_pointer == piece) {
599
255
          dest += n == 0 ? "::" : ":";
600
255
          ignore0 = true;
601
255
          continue;
602
        }
603
        char buf[5];
604
329
        snprintf(buf, sizeof(buf), "%x", *piece);
605
329
        dest += buf;
606
329
        if (n < 7)
607
76
          dest += ':';
608
      }
609
259
      dest += ']';
610
259
      break;
611
    }
612
    case HostType::H_FAILED:
613
      break;
614
  }
615
595
  return dest;
616
}
617
618
4955
bool ParseHost(const std::string& input,
619
               std::string* output,
620
               bool is_special,
621
               bool unicode = false) {
622
4955
  if (input.empty()) {
623
92
    output->clear();
624
92
    return true;
625
  }
626
9726
  URLHost host;
627
4863
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
628
4863
  if (host.ParsingFailed())
629
497
    return false;
630
4366
  *output = host.ToStringMove();
631
4366
  return true;
632
}
633
634
8906
std::vector<std::string> FromJSStringArray(Environment* env,
635
                                           Local<Array> array) {
636
8906
  std::vector<std::string> vec;
637
8906
  if (array->Length() > 0)
638
8890
    vec.reserve(array->Length());
639
134912
  for (size_t n = 0; n < array->Length(); n++) {
640
117100
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
641
117100
    if (val->IsString()) {
642
58550
      Utf8Value value(env->isolate(), val.As<String>());
643
58550
      vec.emplace_back(*value, value.length());
644
    }
645
  }
646
8906
  return vec;
647
}
648
649
8906
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
650
8906
  url_data base;
651
8906
  Local<Context> context = env->context();
652
653
  Local<Value> flags =
654
26718
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
655
8906
  if (flags->IsInt32())
656
17812
    base.flags = flags->Int32Value(context).FromJust();
657
658
  Local<Value> port =
659
26718
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
660
8906
  if (port->IsInt32())
661
76
    base.port = port->Int32Value(context).FromJust();
662
663
  Local<Value> scheme =
664
17812
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
665
8906
  base.scheme = Utf8Value(env->isolate(), scheme).out();
666
667
  auto GetStr = [&](std::string url_data::*member,
668
                    int flag,
669
                    Local<String> name,
670
44530
                    bool empty_as_present) {
671
89060
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
672
89060
    if (value->IsString()) {
673
51196
      Utf8Value utf8value(env->isolate(), value.As<String>());
674
25598
      (base.*member).assign(*utf8value, utf8value.length());
675

43410
      if (empty_as_present || value.As<String>()->Length() != 0) {
676
7808
        base.flags |= flag;
677
      }
678
    }
679
53436
  };
680
8906
  GetStr(&url_data::username,
681
         URL_FLAGS_HAS_USERNAME,
682
         env->username_string(),
683
         false);
684
8906
  GetStr(&url_data::password,
685
         URL_FLAGS_HAS_PASSWORD,
686
         env->password_string(),
687
         false);
688
8906
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
689
8906
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
690
8906
  GetStr(&url_data::fragment,
691
         URL_FLAGS_HAS_FRAGMENT,
692
         env->fragment_string(),
693
         true);
694
695
  Local<Value>
696
26718
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
697
8906
  if (path->IsArray()) {
698
8906
    base.flags |= URL_FLAGS_HAS_PATH;
699
8906
    base.path = FromJSStringArray(env, path.As<Array>());
700
  }
701
8906
  return base;
702
}
703
704
42271
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
705
42271
  url_data context;
706
  Local<Value> flags =
707
126813
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
708
42271
  if (flags->IsInt32()) {
709
    static constexpr int32_t kCopyFlagsMask =
710
        URL_FLAGS_SPECIAL |
711
        URL_FLAGS_CANNOT_BE_BASE |
712
        URL_FLAGS_HAS_USERNAME |
713
        URL_FLAGS_HAS_PASSWORD |
714
        URL_FLAGS_HAS_HOST;
715
42271
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
716
  }
717
  Local<Value> scheme =
718
126813
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
719
84542
  if (scheme->IsString()) {
720
84542
    Utf8Value value(env->isolate(), scheme);
721
42271
    context.scheme.assign(*value, value.length());
722
  }
723
  Local<Value> port =
724
126813
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
725
42271
  if (port->IsInt32())
726
243
    context.port = port.As<Int32>()->Value();
727
42271
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
728
    Local<Value> username =
729
221
        context_obj->Get(env->context(),
730
663
                         env->username_string()).ToLocalChecked();
731
442
    CHECK(username->IsString());
732
442
    Utf8Value value(env->isolate(), username);
733
221
    context.username.assign(*value, value.length());
734
  }
735
42271
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
736
    Local<Value> password =
737
209
        context_obj->Get(env->context(),
738
627
                         env->password_string()).ToLocalChecked();
739
418
    CHECK(password->IsString());
740
418
    Utf8Value value(env->isolate(), password);
741
209
    context.password.assign(*value, value.length());
742
  }
743
  Local<Value> host =
744
42271
      context_obj->Get(env->context(),
745
126813
                       env->host_string()).ToLocalChecked();
746
84542
  if (host->IsString()) {
747
84466
    Utf8Value value(env->isolate(), host);
748
42233
    context.host.assign(*value, value.length());
749
  }
750
42271
  return context;
751
}
752
753
// Single dot segment can be ".", "%2e", or "%2E"
754
2470631
bool IsSingleDotSegment(const std::string& str) {
755
2470631
  switch (str.size()) {
756
6989
    case 1:
757
6989
      return str == ".";
758
148722
    case 3:
759
148722
      return str[0] == '%' &&
760

148768
             str[1] == '2' &&
761
148768
             ASCIILowercase(str[2]) == 'e';
762
2314920
    default:
763
2314920
      return false;
764
  }
765
}
766
767
// Double dot segment can be:
768
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
769
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
770
1240229
bool IsDoubleDotSegment(const std::string& str) {
771

1240229
  switch (str.size()) {
772
5117
    case 2:
773
5117
      return str == "..";
774
349278
    case 4:
775

349278
      if (str[0] != '.' && str[0] != '%')
776
349259
        return false;
777
19
      return ((str[0] == '.' &&
778
13
               str[1] == '%' &&
779

8
               str[2] == '2' &&
780
42
               ASCIILowercase(str[3]) == 'e') ||
781
15
              (str[0] == '%' &&
782

12
               str[1] == '2' &&
783
6
               ASCIILowercase(str[2]) == 'e' &&
784
25
               str[3] == '.'));
785
71387
    case 6:
786
71387
      return (str[0] == '%' &&
787

24
              str[1] == '2' &&
788
12
              ASCIILowercase(str[2]) == 'e' &&
789
4
              str[3] == '%' &&
790

71403
              str[4] == '2' &&
791
71391
              ASCIILowercase(str[5]) == 'e');
792
814447
    default:
793
814447
      return false;
794
  }
795
}
796
797
11348
void ShortenUrlPath(struct url_data* url) {
798
11348
  if (url->path.empty()) return;
799


11549
  if (url->path.size() == 1 && url->scheme == "file:" &&
800
604
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
801
10945
  url->path.pop_back();
802
}
803
804
}  // anonymous namespace
805
806
223136
void URL::Parse(const char* input,
807
                size_t len,
808
                enum url_parse_state state_override,
809
                struct url_data* url,
810
                bool has_url,
811
                const struct url_data* base,
812
                bool has_base) {
813
223136
  const char* p = input;
814
223136
  const char* end = input + len;
815
816
223136
  if (!has_url) {
817
146298
    for (const char* ptr = p; ptr < end; ptr++) {
818
146279
      if (IsC0ControlOrSpace(*ptr))
819
56
        p++;
820
      else
821
146223
        break;
822
    }
823
146290
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
824
146271
      if (IsC0ControlOrSpace(*ptr))
825
48
        end--;
826
      else
827
146223
        break;
828
    }
829
146242
    input = p;
830
146242
    len = end - p;
831
  }
832
833
  // The spec says we should strip out any ASCII tabs or newlines.
834
  // In those cases, we create another std::string instance with the filtered
835
  // contents, but in the general case we avoid the overhead.
836
223136
  std::string whitespace_stripped;
837
14133583
  for (const char* ptr = p; ptr < end; ptr++) {
838
13910617
    if (!IsASCIITabOrNewline(*ptr))
839
13910447
      continue;
840
    // Hit tab or newline. Allocate storage, copy what we have until now,
841
    // and then iterate and filter all similar characters out.
842
170
    whitespace_stripped.reserve(len - 1);
843
170
    whitespace_stripped.assign(p, ptr - p);
844
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
845
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
846
853
      if (!IsASCIITabOrNewline(*ptr))
847
769
        whitespace_stripped += *ptr;
848
    }
849
850
    // Update variables like they should have looked like if the string
851
    // had been stripped of whitespace to begin with.
852
170
    input = whitespace_stripped.c_str();
853
170
    len = whitespace_stripped.size();
854
170
    p = input;
855
170
    end = input + len;
856
170
    break;
857
  }
858
859
223136
  bool atflag = false;  // Set when @ has been seen.
860
223136
  bool square_bracket_flag = false;  // Set inside of [...]
861
223136
  bool password_token_seen_flag = false;  // Set after a : after an username.
862
863
223136
  std::string buffer;
864
865
  // Set the initial parse state.
866
223136
  const bool has_state_override = state_override != kUnknownState;
867
223136
  enum url_parse_state state = has_state_override ? state_override :
868
                                                    kSchemeStart;
869
870

223136
  if (state < kSchemeStart || state > kFragment) {
871
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
872
    return;
873
  }
874
875
14651322
  while (p <= end) {
876
14436997
    const char ch = p < end ? p[0] : kEOL;
877
14436997
    bool special = (url->flags & URL_FLAGS_SPECIAL);
878
    bool cannot_be_base;
879

14436997
    bool special_back_slash = (special && ch == '\\');
880
881





14436997
    switch (state) {
882
146325
      case kSchemeStart:
883
146325
        if (IsASCIIAlpha(ch)) {
884
133267
          buffer += ASCIILowercase(ch);
885
133267
          state = kScheme;
886
13058
        } else if (!has_state_override) {
887
13048
          state = kNoScheme;
888
13048
          continue;
889
        } else {
890
10
          url->flags |= URL_FLAGS_FAILED;
891
10
          return;
892
        }
893
133267
        break;
894
551458
      case kScheme:
895


551458
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
896
418191
          buffer += ASCIILowercase(ch);
897

133267
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
898

131262
          if (has_state_override && buffer.size() == 0) {
899
            url->flags |= URL_FLAGS_TERMINATED;
900
            return;
901
          }
902
131262
          buffer += ':';
903
904
131262
          bool new_is_special = IsSpecial(buffer);
905
906
131262
          if (has_state_override) {
907
45
            if ((special != new_is_special) ||
908
45
                ((buffer == "file:") &&
909
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
910
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
911


116
                  (url->port != -1))) ||
912
45
                  (url->scheme == "file:" && url->host.empty())) {
913
32
              url->flags |= URL_FLAGS_TERMINATED;
914
32
              return;
915
            }
916
          }
917
918
131230
          url->scheme = std::move(buffer);
919
131230
          url->port = NormalizePort(url->scheme, url->port);
920
131230
          if (new_is_special) {
921
125474
            url->flags |= URL_FLAGS_SPECIAL;
922
125474
            special = true;
923
          } else {
924
5756
            url->flags &= ~URL_FLAGS_SPECIAL;
925
5756
            special = false;
926
          }
927
          // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
928
          // here always not equals to `\\`. So `special_back_slash` here always
929
          // equals to `false`.
930
131230
          special_back_slash = false;
931
131230
          buffer.clear();
932
131230
          if (has_state_override)
933
33
            return;
934
131197
          if (url->scheme == "file:") {
935
121508
            state = kFile;
936
3949
          } else if (special &&
937

13638
                     has_base &&
938
1041
                     url->scheme == base->scheme) {
939
331
            state = kSpecialRelativeOrAuthority;
940
9358
          } else if (special) {
941
3618
            state = kSpecialAuthoritySlashes;
942

5740
          } else if (p + 1 < end && p[1] == '/') {
943
714
            state = kPathOrAuthority;
944
714
            p++;
945
          } else {
946
5026
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
947
5026
            url->flags |= URL_FLAGS_HAS_PATH;
948
5026
            url->path.emplace_back("");
949
5026
            state = kCannotBeBase;
950
131197
          }
951
2005
        } else if (!has_state_override) {
952
1997
          buffer.clear();
953
1997
          state = kNoScheme;
954
1997
          p = input;
955
1997
          continue;
956
        } else {
957
8
          url->flags |= URL_FLAGS_FAILED;
958
8
          return;
959
        }
960
549388
        break;
961
15045
      case kNoScheme:
962

15045
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
963

15045
        if (!has_base || (cannot_be_base && ch != '#')) {
964
7819
          url->flags |= URL_FLAGS_FAILED;
965
7819
          return;
966

7226
        } else if (cannot_be_base && ch == '#') {
967
28
          url->scheme = base->scheme;
968
28
          if (IsSpecial(url->scheme)) {
969
            url->flags |= URL_FLAGS_SPECIAL;
970
            special = true;
971
          } else {
972
28
            url->flags &= ~URL_FLAGS_SPECIAL;
973
28
            special = false;
974
          }
975

28
          special_back_slash = (special && ch == '\\');
976
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
977
28
            url->flags |= URL_FLAGS_HAS_PATH;
978
28
            url->path = base->path;
979
          }
980
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
981
4
            url->flags |= URL_FLAGS_HAS_QUERY;
982
4
            url->query = base->query;
983
          }
984
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
985
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
986
            url->fragment = base->fragment;
987
          }
988
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
989
28
          state = kFragment;
990

14396
        } else if (has_base &&
991
7198
                   base->scheme != "file:") {
992
413
          state = kRelative;
993
413
          continue;
994
        } else {
995
6785
          url->scheme = "file:";
996
6785
          url->flags |= URL_FLAGS_SPECIAL;
997
6785
          special = true;
998
6785
          state = kFile;
999

6785
          special_back_slash = (special && ch == '\\');
1000
6785
          continue;
1001
        }
1002
28
        break;
1003
331
      case kSpecialRelativeOrAuthority:
1004

331
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1005
299
          state = kSpecialAuthorityIgnoreSlashes;
1006
299
          p++;
1007
        } else {
1008
32
          state = kRelative;
1009
32
          continue;
1010
        }
1011
299
        break;
1012
714
      case kPathOrAuthority:
1013
714
        if (ch == '/') {
1014
546
          state = kAuthority;
1015
        } else {
1016
168
          state = kPath;
1017
168
          continue;
1018
        }
1019
546
        break;
1020
445
      case kRelative:
1021
445
        url->scheme = base->scheme;
1022
445
        if (IsSpecial(url->scheme)) {
1023
345
          url->flags |= URL_FLAGS_SPECIAL;
1024
345
          special = true;
1025
        } else {
1026
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1027
100
          special = false;
1028
        }
1029

445
        special_back_slash = (special && ch == '\\');
1030

445
        switch (ch) {
1031
18
          case kEOL:
1032
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1033
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1034
4
              url->username = base->username;
1035
            }
1036
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1037
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1038
4
              url->password = base->password;
1039
            }
1040
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1041
16
              url->flags |= URL_FLAGS_HAS_HOST;
1042
16
              url->host = base->host;
1043
            }
1044
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1045
              url->flags |= URL_FLAGS_HAS_QUERY;
1046
              url->query = base->query;
1047
            }
1048
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1049
18
              url->flags |= URL_FLAGS_HAS_PATH;
1050
18
              url->path = base->path;
1051
            }
1052
18
            url->port = base->port;
1053
18
            break;
1054
154
          case '/':
1055
154
            state = kRelativeSlash;
1056
154
            break;
1057
38
          case '?':
1058
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1059
              url->flags |= URL_FLAGS_HAS_USERNAME;
1060
              url->username = base->username;
1061
            }
1062
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1063
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1064
              url->password = base->password;
1065
            }
1066
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1067
34
              url->flags |= URL_FLAGS_HAS_HOST;
1068
34
              url->host = base->host;
1069
            }
1070
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1071
38
              url->flags |= URL_FLAGS_HAS_PATH;
1072
38
              url->path = base->path;
1073
            }
1074
38
            url->port = base->port;
1075
38
            state = kQuery;
1076
38
            break;
1077
38
          case '#':
1078
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1079
              url->flags |= URL_FLAGS_HAS_USERNAME;
1080
              url->username = base->username;
1081
            }
1082
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1083
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1084
              url->password = base->password;
1085
            }
1086
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1087
34
              url->flags |= URL_FLAGS_HAS_HOST;
1088
34
              url->host = base->host;
1089
            }
1090
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1091
              url->flags |= URL_FLAGS_HAS_QUERY;
1092
              url->query = base->query;
1093
            }
1094
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1095
38
              url->flags |= URL_FLAGS_HAS_PATH;
1096
38
              url->path = base->path;
1097
            }
1098
38
            url->port = base->port;
1099
38
            state = kFragment;
1100
38
            break;
1101
197
          default:
1102
197
            if (special_back_slash) {
1103
18
              state = kRelativeSlash;
1104
            } else {
1105
179
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1106
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1107
1
                url->username = base->username;
1108
              }
1109
179
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1110
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1111
1
                url->password = base->password;
1112
              }
1113
179
              if (base->flags & URL_FLAGS_HAS_HOST) {
1114
159
                url->flags |= URL_FLAGS_HAS_HOST;
1115
159
                url->host = base->host;
1116
              }
1117
179
              if (base->flags & URL_FLAGS_HAS_PATH) {
1118
179
                url->flags |= URL_FLAGS_HAS_PATH;
1119
179
                url->path = base->path;
1120
179
                ShortenUrlPath(url);
1121
              }
1122
179
              url->port = base->port;
1123
179
              state = kPath;
1124
179
              continue;
1125
            }
1126
        }
1127
266
        break;
1128
172
      case kRelativeSlash:
1129


172
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1130
22
          state = kSpecialAuthorityIgnoreSlashes;
1131
150
        } else if (ch == '/') {
1132
6
          state = kAuthority;
1133
        } else {
1134
144
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1135
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1136
8
            url->username = base->username;
1137
          }
1138
144
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1139
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1140
4
            url->password = base->password;
1141
          }
1142
144
          if (base->flags & URL_FLAGS_HAS_HOST) {
1143
136
            url->flags |= URL_FLAGS_HAS_HOST;
1144
136
            url->host = base->host;
1145
          }
1146
144
          url->port = base->port;
1147
144
          state = kPath;
1148
144
          continue;
1149
        }
1150
28
        break;
1151
3618
      case kSpecialAuthoritySlashes:
1152
3618
        state = kSpecialAuthorityIgnoreSlashes;
1153

3618
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1154
3469
          p++;
1155
        } else {
1156
149
          continue;
1157
        }
1158
3469
        break;
1159
4016
      case kSpecialAuthorityIgnoreSlashes:
1160

4016
        if (ch != '/' && ch != '\\') {
1161
3939
          state = kAuthority;
1162
3939
          continue;
1163
        }
1164
77
        break;
1165
94669
      case kAuthority:
1166
94669
        if (ch == '@') {
1167
565
          if (atflag) {
1168
41
            buffer.reserve(buffer.size() + 3);
1169
41
            buffer.insert(0, "%40");
1170
          }
1171
565
          atflag = true;
1172
565
          size_t blen = buffer.size();
1173

565
          if (blen > 0 && buffer[0] != ':') {
1174
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1175
          }
1176
6652
          for (size_t n = 0; n < blen; n++) {
1177
6087
            const char bch = buffer[n];
1178
6087
            if (bch == ':') {
1179
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1180
444
              if (!password_token_seen_flag) {
1181
428
                password_token_seen_flag = true;
1182
428
                continue;
1183
              }
1184
            }
1185
5659
            if (password_token_seen_flag) {
1186
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1187
            } else {
1188
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1189
            }
1190
          }
1191
565
          buffer.clear();
1192

94104
        } else if (ch == kEOL ||
1193
89683
                   ch == '/' ||
1194
89651
                   ch == '?' ||
1195
89633
                   ch == '#' ||
1196
                   special_back_slash) {
1197

4491
          if (atflag && buffer.size() == 0) {
1198
52
            url->flags |= URL_FLAGS_FAILED;
1199
52
            return;
1200
          }
1201
4439
          p -= buffer.size() + 1;
1202
4439
          buffer.clear();
1203
4439
          state = kHost;
1204
        } else {
1205
89613
          buffer += ch;
1206
        }
1207
94617
        break;
1208
85822
      case kHost:
1209
      case kHostname:
1210

85822
        if (has_state_override && url->scheme == "file:") {
1211
12
          state = kFileHost;
1212
12
          continue;
1213

85810
        } else if (ch == ':' && !square_bracket_flag) {
1214
1639
          if (buffer.size() == 0) {
1215
24
            url->flags |= URL_FLAGS_FAILED;
1216
24
            return;
1217
          }
1218
1615
          if (state_override == kHostname) {
1219
4
            return;
1220
          }
1221
1611
          url->flags |= URL_FLAGS_HAS_HOST;
1222
1611
          if (!ParseHost(buffer, &url->host, special)) {
1223
5
            url->flags |= URL_FLAGS_FAILED;
1224
5
            return;
1225
          }
1226
1606
          buffer.clear();
1227
1606
          state = kPort;
1228

84171
        } else if (ch == kEOL ||
1229
81081
                   ch == '/' ||
1230
81041
                   ch == '?' ||
1231
81015
                   ch == '#' ||
1232
                   special_back_slash) {
1233
3180
          p--;
1234

3180
          if (special && buffer.size() == 0) {
1235
21
            url->flags |= URL_FLAGS_FAILED;
1236
21
            return;
1237
          }
1238
331
          if (has_state_override &&
1239

3528
              buffer.size() == 0 &&
1240
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1241
38
               url->port != -1)) {
1242
8
            url->flags |= URL_FLAGS_TERMINATED;
1243
8
            return;
1244
          }
1245
3151
          url->flags |= URL_FLAGS_HAS_HOST;
1246
3151
          if (!ParseHost(buffer, &url->host, special)) {
1247
440
            url->flags |= URL_FLAGS_FAILED;
1248
440
            return;
1249
          }
1250
2711
          buffer.clear();
1251
2711
          state = kPathStart;
1252
2711
          if (has_state_override) {
1253
227
            return;
1254
          }
1255
        } else {
1256
80991
          if (ch == '[')
1257
353
            square_bracket_flag = true;
1258
80991
          if (ch == ']')
1259
349
            square_bracket_flag = false;
1260
80991
          buffer += ch;
1261
        }
1262
85081
        break;
1263
9281
      case kPort:
1264
9281
        if (IsASCIIDigit(ch)) {
1265
7606
          buffer += ch;
1266

1675
        } else if (has_state_override ||
1267
1137
                   ch == kEOL ||
1268
36
                   ch == '/' ||
1269
36
                   ch == '?' ||
1270
36
                   ch == '#' ||
1271
                   special_back_slash) {
1272
1639
          if (buffer.size() > 0) {
1273
1625
            unsigned port = 0;
1274
            // the condition port <= 0xffff prevents integer overflow
1275

9015
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1276
7390
              port = port * 10 + buffer[i] - '0';
1277
1625
            if (port > 0xffff) {
1278
              // TODO(TimothyGu): This hack is currently needed for the host
1279
              // setter since it needs access to hostname if it is valid, and
1280
              // if the FAILED flag is set the entire response to JS layer
1281
              // will be empty.
1282
26
              if (state_override == kHost)
1283
2
                url->port = -1;
1284
              else
1285
24
                url->flags |= URL_FLAGS_FAILED;
1286
26
              return;
1287
            }
1288
            // the port is valid
1289
1599
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1290
1599
            if (url->port == -1)
1291
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1292
1599
            buffer.clear();
1293
14
          } else if (has_state_override) {
1294
            // TODO(TimothyGu): Similar case as above.
1295
6
            if (state_override == kHost)
1296
2
              url->port = -1;
1297
            else
1298
4
              url->flags |= URL_FLAGS_TERMINATED;
1299
6
            return;
1300
          }
1301
1607
          state = kPathStart;
1302
1607
          continue;
1303
        } else {
1304
36
          url->flags |= URL_FLAGS_FAILED;
1305
36
          return;
1306
        }
1307
7606
        break;
1308
128293
      case kFile:
1309
128293
        url->scheme = "file:";
1310
128293
        url->host.clear();
1311
128293
        url->flags |= URL_FLAGS_HAS_HOST;
1312

128293
        if (ch == '/' || ch == '\\') {
1313
121624
          state = kFileSlash;
1314

6669
        } else if (has_base && base->scheme == "file:") {
1315

6650
          switch (ch) {
1316
4
            case kEOL:
1317
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1318
4
                url->host = base->host;
1319
              }
1320
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1321
4
                url->flags |= URL_FLAGS_HAS_PATH;
1322
4
                url->path = base->path;
1323
              }
1324
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1325
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1326
4
                url->query = base->query;
1327
              }
1328
4
              break;
1329
4
            case '?':
1330
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1331
4
                url->host = base->host;
1332
              }
1333
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1334
4
                url->flags |= URL_FLAGS_HAS_PATH;
1335
4
                url->path = base->path;
1336
              }
1337
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1338
4
              url->query.clear();
1339
4
              state = kQuery;
1340
4
              break;
1341
4
            case '#':
1342
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1343
4
                url->host = base->host;
1344
              }
1345
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1346
4
                url->flags |= URL_FLAGS_HAS_PATH;
1347
4
                url->path = base->path;
1348
              }
1349
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1350
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1351
4
                url->query = base->query;
1352
              }
1353
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1354
4
              url->fragment.clear();
1355
4
              state = kFragment;
1356
4
              break;
1357
6638
            default:
1358
6638
              url->query.clear();
1359
6638
              if (base->flags & URL_FLAGS_HAS_HOST) {
1360
6638
                url->host = base->host;
1361
              }
1362
6638
              if (base->flags & URL_FLAGS_HAS_PATH) {
1363
6638
                url->flags |= URL_FLAGS_HAS_PATH;
1364
6638
                url->path = base->path;
1365
              }
1366
6638
              if (!StartsWithWindowsDriveLetter(p, end)) {
1367
6614
                ShortenUrlPath(url);
1368
              } else {
1369
24
                url->path.clear();
1370
              }
1371
6638
              state = kPath;
1372
6638
              continue;
1373
          }
1374
        } else {
1375
19
          state = kPath;
1376
19
          continue;
1377
        }
1378
121636
        break;
1379
121624
      case kFileSlash:
1380

121624
        if (ch == '/' || ch == '\\') {
1381
121484
          state = kFileHost;
1382
        } else {
1383

140
          if (has_base && base->scheme == "file:") {
1384
126
            url->flags |= URL_FLAGS_HAS_HOST;
1385
126
            url->host = base->host;
1386

238
            if (!StartsWithWindowsDriveLetter(p, end) &&
1387
112
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1388
4
              url->flags |= URL_FLAGS_HAS_PATH;
1389
4
              url->path.push_back(base->path[0]);
1390
            }
1391
          }
1392
140
          state = kPath;
1393
140
          continue;
1394
        }
1395
121484
        break;
1396
122601
      case kFileHost:
1397

122601
        if (ch == kEOL ||
1398
1115
            ch == '/' ||
1399
1105
            ch == '\\' ||
1400
1105
            ch == '?' ||
1401
            ch == '#') {
1402
121484
          if (!has_state_override &&
1403

242980
              buffer.size() == 2 &&
1404
22
              IsWindowsDriveLetter(buffer)) {
1405
12
            state = kPath;
1406
121484
          } else if (buffer.size() == 0) {
1407
121291
            url->flags |= URL_FLAGS_HAS_HOST;
1408
121291
            url->host.clear();
1409
121291
            if (has_state_override)
1410
4
              return;
1411
121287
            state = kPathStart;
1412
          } else {
1413
193
            std::string host;
1414
193
            if (!ParseHost(buffer, &host, special)) {
1415
52
              url->flags |= URL_FLAGS_FAILED;
1416
52
              return;
1417
            }
1418
141
            if (host == "localhost")
1419
37
              host.clear();
1420
141
            url->flags |= URL_FLAGS_HAS_HOST;
1421
141
            url->host = host;
1422
141
            if (has_state_override)
1423
4
              return;
1424
137
            buffer.clear();
1425
137
            state = kPathStart;
1426
          }
1427
121436
          continue;
1428
        } else {
1429
1105
          buffer += ch;
1430
        }
1431
1105
        break;
1432
201632
      case kPathStart:
1433
201632
        if (IsSpecial(url->scheme)) {
1434
201086
          state = kPath;
1435

201086
          if (ch != '/' && ch != '\\') {
1436
76864
            continue;
1437
          }
1438

546
        } else if (!has_state_override && ch == '?') {
1439
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1440
6
          url->query.clear();
1441
6
          state = kQuery;
1442

540
        } else if (!has_state_override && ch == '#') {
1443
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1444
6
          url->fragment.clear();
1445
6
          state = kFragment;
1446
534
        } else if (ch != kEOL) {
1447
457
          state = kPath;
1448
457
          if (ch != '/') {
1449
35
            continue;
1450
          }
1451

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1452
2
          url->flags |= URL_FLAGS_HAS_PATH;
1453
2
          url->path.emplace_back("");
1454
        }
1455
124733
        break;
1456
12846040
      case kPath:
1457

12846040
        if (ch == kEOL ||
1458
11606882
            ch == '/' ||
1459
11606812
            special_back_slash ||
1460

11606812
            (!has_state_override && (ch == '?' || ch == '#'))) {
1461
1240229
          if (IsDoubleDotSegment(buffer)) {
1462
4555
            ShortenUrlPath(url);
1463

4555
            if (ch != '/' && !special_back_slash) {
1464
280
              url->flags |= URL_FLAGS_HAS_PATH;
1465
280
              url->path.emplace_back("");
1466
            }
1467
1238799
          } else if (IsSingleDotSegment(buffer) &&
1468

1238799
                     ch != '/' && !special_back_slash) {
1469
717
            url->flags |= URL_FLAGS_HAS_PATH;
1470
717
            url->path.emplace_back("");
1471
1234957
          } else if (!IsSingleDotSegment(buffer)) {
1472
2459163
            if (url->scheme == "file:" &&
1473
1390107
                url->path.empty() &&
1474

2622656
                buffer.size() == 2 &&
1475
100
                IsWindowsDriveLetter(buffer)) {
1476
98
              buffer[1] = ':';
1477
            }
1478
1232549
            url->flags |= URL_FLAGS_HAS_PATH;
1479
1232549
            url->path.emplace_back(std::move(buffer));
1480
          }
1481
1240229
          buffer.clear();
1482
2480458
          if (ch == '?') {
1483
946
            url->flags |= URL_FLAGS_HAS_QUERY;
1484
946
            url->query.clear();
1485
946
            state = kQuery;
1486
1239283
          } else if (ch == '#') {
1487
55
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1488
55
            url->fragment.clear();
1489
55
            state = kFragment;
1490
          }
1491
        } else {
1492
11605811
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1493
        }
1494
12846040
        break;
1495
41046
      case kCannotBeBase:
1496
41046
        switch (ch) {
1497
4
          case '?':
1498
4
            state = kQuery;
1499
4
            break;
1500
10
          case '#':
1501
10
            state = kFragment;
1502
10
            break;
1503
41032
          default:
1504
41032
            if (url->path.empty())
1505
              url->path.emplace_back("");
1506
41032
            else if (ch != kEOL)
1507
36020
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1508
        }
1509
41046
        break;
1510
59610
      case kQuery:
1511

59610
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1512
1157
          url->flags |= URL_FLAGS_HAS_QUERY;
1513
1157
          url->query = std::move(buffer);
1514
1157
          buffer.clear();
1515
1546
          if (ch == '#')
1516
389
            state = kFragment;
1517
        } else {
1518
58453
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1519
                                                QUERY_ENCODE_SET_NONSPECIAL);
1520
        }
1521
59610
        break;
1522
4255
      case kFragment:
1523
4255
        switch (ch) {
1524
604
          case kEOL:
1525
604
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1526
604
            url->fragment = std::move(buffer);
1527
604
            break;
1528
3651
          default:
1529
3651
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1530
        }
1531
4255
        break;
1532
      default:
1533
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1534
        return;
1535
    }
1536
1537
14194581
    p++;
1538
  }
1539
}  // NOLINT(readability/fn_size)
1540
1541
// https://url.spec.whatwg.org/#url-serializing
1542
34623
std::string URL::SerializeURL(const url_data& url,
1543
                              bool exclude = false) {
1544
34623
  std::string output;
1545
34623
  output.reserve(
1546
    10 +  // We generally insert < 10 separator characters between URL parts
1547
34623
    url.scheme.size() +
1548
34623
    url.username.size() +
1549
34623
    url.password.size() +
1550
34623
    url.host.size() +
1551
34623
    url.query.size() +
1552
34623
    url.fragment.size() +
1553
34623
    url.href.size() +
1554
34623
    std::accumulate(
1555
        url.path.begin(),
1556
        url.path.end(),
1557
        0,
1558
367230
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1559
1560
34623
  output += url.scheme;
1561
34623
  if (url.flags & URL_FLAGS_HAS_HOST) {
1562
34623
    output += "//";
1563
34623
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1564
34623
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1565
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1566
        output += url.username;
1567
      }
1568
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1569
        output += ":" + url.password;
1570
      }
1571
      output += "@";
1572
    }
1573
34623
    output += url.host;
1574
34623
    if (url.port != -1) {
1575
      output += ":" + std::to_string(url.port);
1576
    }
1577
  }
1578
34623
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1579
    output += url.path[0];
1580
  } else {
1581
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1582

34623
          url.path.size() > 1 &&
1583
          url.path[0].empty()) {
1584
      output += "/.";
1585
    }
1586
367230
    for (size_t i = 1; i < url.path.size(); i++) {
1587
332607
      output += "/" + url.path[i];
1588
    }
1589
  }
1590
34623
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1591
    output += "?" + url.query;
1592
  }
1593

34623
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1594
    output += "#" + url.fragment;
1595
  }
1596
34623
  output.shrink_to_fit();
1597
34623
  return output;
1598
}
1599
1600
namespace {
1601
145330
void SetArgs(Environment* env,
1602
             Local<Value> argv[ARG_COUNT],
1603
             const struct url_data& url) {
1604
145330
  Isolate* isolate = env->isolate();
1605
145330
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1606
290660
  argv[ARG_PROTOCOL] =
1607
145330
      url.flags & URL_FLAGS_SPECIAL ?
1608
139349
          GetSpecial(env, url.scheme) :
1609
5981
          OneByteString(isolate, url.scheme.c_str());
1610
145330
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1611
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1612
145330
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1613
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1614
145330
  if (url.flags & URL_FLAGS_HAS_HOST)
1615
280086
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1616
145330
  if (url.flags & URL_FLAGS_HAS_QUERY)
1617
2330
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1618
145330
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1619
1200
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1620
145330
  if (url.port > -1)
1621
3478
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1622
145330
  if (url.flags & URL_FLAGS_HAS_PATH)
1623
289468
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1624
145330
}
1625
1626
153861
void Parse(Environment* env,
1627
           Local<Value> recv,
1628
           const char* input,
1629
           size_t len,
1630
           enum url_parse_state state_override,
1631
           Local<Value> base_obj,
1632
           Local<Value> context_obj,
1633
           Local<Function> cb,
1634
           Local<Value> error_cb) {
1635
153861
  Isolate* isolate = env->isolate();
1636
153861
  Local<Context> context = env->context();
1637
153861
  HandleScope handle_scope(isolate);
1638
153861
  Context::Scope context_scope(context);
1639
1640
153861
  const bool has_context = context_obj->IsObject();
1641
153861
  const bool has_base = base_obj->IsObject();
1642
1643
153861
  url_data base;
1644
153861
  url_data url;
1645
153861
  if (has_context)
1646
42271
    url = HarvestContext(env, context_obj.As<Object>());
1647
153861
  if (has_base)
1648
8906
    base = HarvestBase(env, base_obj.As<Object>());
1649
1650
153861
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1651

153861
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1652
42271
      ((state_override != kUnknownState) &&
1653
42271
       (url.flags & URL_FLAGS_TERMINATED)))
1654
44
    return;
1655
1656
  // Define the return value placeholders
1657
153817
  const Local<Value> undef = Undefined(isolate);
1658
153817
  const Local<Value> null = Null(isolate);
1659
153817
  if (!(url.flags & URL_FLAGS_FAILED)) {
1660
    Local<Value> argv[] = {
1661
      undef,
1662
      undef,
1663
      undef,
1664
      undef,
1665
      null,  // host defaults to null
1666
      null,  // port defaults to null
1667
      undef,
1668
      null,  // query defaults to null
1669
      null,  // fragment defaults to null
1670
145330
    };
1671
145330
    SetArgs(env, argv, url);
1672
145330
    USE(cb->Call(context, recv, arraysize(argv), argv));
1673
8487
  } else if (error_cb->IsFunction()) {
1674
16714
    Local<Value> flags = Integer::NewFromUnsigned(isolate, url.flags);
1675
8357
    USE(error_cb.As<Function>()->Call(context, recv, 1, &flags));
1676
  }
1677
}
1678
1679
153861
void Parse(const FunctionCallbackInfo<Value>& args) {
1680
153861
  Environment* env = Environment::GetCurrent(args);
1681
153861
  CHECK_GE(args.Length(), 5);
1682
307722
  CHECK(args[0]->IsString());  // input
1683


418982
  CHECK(args[2]->IsUndefined() ||  // base context
1684
        args[2]->IsNull() ||
1685
        args[2]->IsObject());
1686


434535
  CHECK(args[3]->IsUndefined() ||  // context
1687
        args[3]->IsNull() ||
1688
        args[3]->IsObject());
1689
153861
  CHECK(args[4]->IsFunction());  // complete callback
1690

419312
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1691
1692
153861
  Utf8Value input(env->isolate(), args[0]);
1693
153861
  enum url_parse_state state_override = kUnknownState;
1694
153861
  if (args[1]->IsNumber()) {
1695
153861
    state_override = static_cast<enum url_parse_state>(
1696
307722
        args[1]->Uint32Value(env->context()).FromJust());
1697
  }
1698
1699
307722
  Parse(env, args.This(),
1700
153861
        *input, input.length(),
1701
        state_override,
1702
        args[2],
1703
        args[3],
1704
307722
        args[4].As<Function>(),
1705
        args[5]);
1706
153861
}
1707
1708
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1709
92
  Environment* env = Environment::GetCurrent(args);
1710
92
  CHECK_GE(args.Length(), 1);
1711
184
  CHECK(args[0]->IsString());
1712
184
  Utf8Value value(env->isolate(), args[0]);
1713
92
  std::string output;
1714
92
  size_t len = value.length();
1715
92
  output.reserve(len);
1716
756
  for (size_t n = 0; n < len; n++) {
1717
664
    const char ch = (*value)[n];
1718
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1719
  }
1720
276
  args.GetReturnValue().Set(
1721
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1722
92
}
1723
1724
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1725
229
  Environment* env = Environment::GetCurrent(args);
1726
229
  CHECK_GE(args.Length(), 1);
1727
458
  CHECK(args[0]->IsString());
1728
229
  Utf8Value value(env->isolate(), args[0]);
1729
1730
229
  URLHost host;
1731
  // Assuming the host is used for a special scheme.
1732
229
  host.ParseHost(*value, value.length(), true);
1733
229
  if (host.ParsingFailed()) {
1734
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1735
12
    return;
1736
  }
1737
217
  std::string out = host.ToStringMove();
1738
651
  args.GetReturnValue().Set(
1739
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1740
}
1741
1742
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1743
207
  Environment* env = Environment::GetCurrent(args);
1744
207
  CHECK_GE(args.Length(), 1);
1745
414
  CHECK(args[0]->IsString());
1746
207
  Utf8Value value(env->isolate(), args[0]);
1747
1748
207
  URLHost host;
1749
  // Assuming the host is used for a special scheme.
1750
207
  host.ParseHost(*value, value.length(), true, true);
1751
207
  if (host.ParsingFailed()) {
1752
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1753
12
    return;
1754
  }
1755
195
  std::string out = host.ToStringMove();
1756
585
  args.GetReturnValue().Set(
1757
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1758
}
1759
1760
628
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1761
628
  Environment* env = Environment::GetCurrent(args);
1762
628
  CHECK_EQ(args.Length(), 1);
1763
628
  CHECK(args[0]->IsFunction());
1764
1256
  env->set_url_constructor_function(args[0].As<Function>());
1765
628
}
1766
1767
628
void Initialize(Local<Object> target,
1768
                Local<Value> unused,
1769
                Local<Context> context,
1770
                void* priv) {
1771
628
  Environment* env = Environment::GetCurrent(context);
1772
628
  env->SetMethod(target, "parse", Parse);
1773
628
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1774
628
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1775
628
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1776
628
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1777
1778
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1779
16956
  FLAGS(XX)
1780
#undef XX
1781
1782
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1783
26376
  PARSESTATES(XX)
1784
#undef XX
1785
628
}
1786
}  // namespace
1787
1788
4950
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1789
4950
  registry->Register(Parse);
1790
4950
  registry->Register(EncodeAuthSet);
1791
4950
  registry->Register(DomainToASCII);
1792
4950
  registry->Register(DomainToUnicode);
1793
4950
  registry->Register(SetURLConstructor);
1794
4950
}
1795
1796
8
std::string URL::ToFilePath() const {
1797
8
  if (context_.scheme != "file:") {
1798
1
    return "";
1799
  }
1800
1801
#ifdef _WIN32
1802
  const char* slash = "\\";
1803
  auto is_slash = [] (char ch) {
1804
    return ch == '/' || ch == '\\';
1805
  };
1806
#else
1807
7
  const char* slash = "/";
1808
46
  auto is_slash = [] (char ch) {
1809
46
    return ch == '/';
1810
  };
1811

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1812
7
      context_.host.length() > 0) {
1813
1
    return "";
1814
  }
1815
#endif
1816
12
  std::string decoded_path;
1817
18
  for (const std::string& part : context_.path) {
1818
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1819
58
    for (char& ch : decoded) {
1820
46
      if (is_slash(ch)) {
1821
1
        return "";
1822
      }
1823
    }
1824
12
    decoded_path += slash + decoded;
1825
  }
1826
1827
#ifdef _WIN32
1828
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1829
1830
  // If hostname is set, then we have a UNC path. Pass the hostname through
1831
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1832
  // need to worry about percent encoding because the URL parser will have
1833
  // already taken care of that for us. Note that this only causes IDNs with an
1834
  // appropriate `xn--` prefix to be decoded.
1835
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1836
      context_.host.length() > 0) {
1837
    std::string unicode_host;
1838
    if (!ToUnicode(context_.host, &unicode_host)) {
1839
      return "";
1840
    }
1841
    return "\\\\" + unicode_host + decoded_path;
1842
  }
1843
  // Otherwise, it's a local path that requires a drive letter.
1844
  if (decoded_path.length() < 3) {
1845
    return "";
1846
  }
1847
  if (decoded_path[2] != ':' ||
1848
      !IsASCIIAlpha(decoded_path[1])) {
1849
    return "";
1850
  }
1851
  // Strip out the leading '\'.
1852
  return decoded_path.substr(1);
1853
#else
1854
5
  return decoded_path;
1855
#endif
1856
}
1857
1858
34623
URL URL::FromFilePath(const std::string& file_path) {
1859
69246
  URL url("file://");
1860
69246
  std::string escaped_file_path;
1861
3686016
  for (size_t i = 0; i < file_path.length(); ++i) {
1862
3651393
    escaped_file_path += file_path[i];
1863
3651393
    if (file_path[i] == '%')
1864
12
      escaped_file_path += "25";
1865
  }
1866
34623
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1867
             &url.context_, true, nullptr, false);
1868
34623
  return url;
1869
}
1870
1871
// This function works by calling out to a JS function that creates and
1872
// returns the JS URL object. Be mindful of the JS<->Native boundary
1873
// crossing that is required.
1874
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1875
  Isolate* isolate = env->isolate();
1876
  Local<Context> context = env->context();
1877
  Context::Scope context_scope(context);
1878
1879
  const Local<Value> undef = Undefined(isolate);
1880
  const Local<Value> null = Null(isolate);
1881
1882
  if (context_.flags & URL_FLAGS_FAILED)
1883
    return Local<Value>();
1884
1885
  Local<Value> argv[] = {
1886
    undef,
1887
    undef,
1888
    undef,
1889
    undef,
1890
    null,  // host defaults to null
1891
    null,  // port defaults to null
1892
    undef,
1893
    null,  // query defaults to null
1894
    null,  // fragment defaults to null
1895
  };
1896
  SetArgs(env, argv, context_);
1897
1898
  MaybeLocal<Value> ret;
1899
  {
1900
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1901
1902
    // The SetURLConstructor method must have been called already to
1903
    // set the constructor function used below. SetURLConstructor is
1904
    // called automatically when the internal/url.js module is loaded
1905
    // during the internal/bootstrap/node.js processing.
1906
    ret = env->url_constructor_function()
1907
        ->Call(env->context(), undef, arraysize(argv), argv);
1908
  }
1909
1910
  return ret;
1911
}
1912
1913
}  // namespace url
1914
}  // namespace node
1915
1916
5008
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1917
4950
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)