GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1139 1197 95.2 %
Date: 2022-07-22 04:16:17 Branches: 987 1116 88.4 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <algorithm>
9
#include <cmath>
10
#include <cstdio>
11
#include <numeric>
12
#include <string>
13
#include <vector>
14
15
namespace node {
16
17
using errors::TryCatchScope;
18
19
using url::table_data::hex;
20
using url::table_data::C0_CONTROL_ENCODE_SET;
21
using url::table_data::FRAGMENT_ENCODE_SET;
22
using url::table_data::PATH_ENCODE_SET;
23
using url::table_data::USERINFO_ENCODE_SET;
24
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
25
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
26
27
using v8::Array;
28
using v8::Context;
29
using v8::Function;
30
using v8::FunctionCallbackInfo;
31
using v8::HandleScope;
32
using v8::Int32;
33
using v8::Integer;
34
using v8::Isolate;
35
using v8::Local;
36
using v8::MaybeLocal;
37
using v8::NewStringType;
38
using v8::Null;
39
using v8::Object;
40
using v8::String;
41
using v8::Undefined;
42
using v8::Value;
43
44
155875
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
45
155875
  return String::NewFromUtf8(isolate,
46
                             str.data(),
47
                             NewStringType::kNormal,
48
155875
                             str.length()).ToLocalChecked();
49
}
50
51
namespace url {
52
namespace {
53
54
// https://url.spec.whatwg.org/#eof-code-point
55
constexpr char kEOL = -1;
56
57
// https://url.spec.whatwg.org/#concept-host
58
class URLHost {
59
 public:
60
  ~URLHost();
61
62
  void ParseIPv4Host(const char* input, size_t length);
63
  void ParseIPv6Host(const char* input, size_t length);
64
  void ParseOpaqueHost(const char* input, size_t length);
65
  void ParseHost(const char* input,
66
                 size_t length,
67
                 bool is_special,
68
                 bool unicode = false);
69
70
5006
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
71
  std::string ToString() const;
72
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
73
  std::string ToStringMove();
74
75
 private:
76
  enum class HostType {
77
    H_FAILED,
78
    H_DOMAIN,
79
    H_IPV4,
80
    H_IPV6,
81
    H_OPAQUE,
82
  };
83
84
  union Value {
85
    std::string domain_or_opaque;
86
    uint32_t ipv4;
87
    uint16_t ipv6[8];
88
89
5006
    ~Value() {}
90
5006
    Value() : ipv4(0) {}
91
  };
92
93
  Value value_;
94
  HostType type_ = HostType::H_FAILED;
95
96
12993
  void Reset() {
97
    using string = std::string;
98
12993
    switch (type_) {
99
3688
      case HostType::H_DOMAIN:
100
      case HostType::H_OPAQUE:
101
3688
        value_.domain_or_opaque.~string();
102
3688
        break;
103
9305
      default:
104
9305
        break;
105
    }
106
12993
    type_ = HostType::H_FAILED;
107
12993
  }
108
109
  // Setting the string members of the union with = is brittle because
110
  // it relies on them being initialized to a state that requires no
111
  // destruction of old data.
112
  // For a long time, that worked well enough because ParseIPv6Host() happens
113
  // to zero-fill `value_`, but that really is relying on standard library
114
  // internals too much.
115
  // These helpers are the easiest solution but we might want to consider
116
  // just not forcing strings into an union.
117
310
  void SetOpaque(std::string&& string) {
118
310
    Reset();
119
310
    type_ = HostType::H_OPAQUE;
120
310
    new(&value_.domain_or_opaque) std::string(std::move(string));
121
310
  }
122
123
3378
  void SetDomain(std::string&& string) {
124
3378
    Reset();
125
3378
    type_ = HostType::H_DOMAIN;
126
3378
    new(&value_.domain_or_opaque) std::string(std::move(string));
127
3378
  }
128
};
129
130
5006
URLHost::~URLHost() {
131
5006
  Reset();
132
5006
}
133
134
#define ARGS(XX)                                                              \
135
  XX(ARG_FLAGS)                                                               \
136
  XX(ARG_PROTOCOL)                                                            \
137
  XX(ARG_USERNAME)                                                            \
138
  XX(ARG_PASSWORD)                                                            \
139
  XX(ARG_HOST)                                                                \
140
  XX(ARG_PORT)                                                                \
141
  XX(ARG_PATH)                                                                \
142
  XX(ARG_QUERY)                                                               \
143
  XX(ARG_FRAGMENT)                                                            \
144
  XX(ARG_COUNT)  // This one has to be last.
145
146
enum url_cb_args {
147
#define XX(name) name,
148
  ARGS(XX)
149
#undef XX
150
};
151
152
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
153
  template <typename T>                                                       \
154
  bool name(const T ch1, const T ch2) {                                \
155
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
156
                  "Character must be wider than " #bits " bits");             \
157
    return (expr);                                                            \
158
  }                                                                           \
159
  template <typename T>                                                       \
160
  bool name(const std::basic_string<T>& str) {                         \
161
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
162
                  "Character must be wider than " #bits " bits");             \
163
    return str.length() >= 2 && name(str[0], str[1]);                         \
164
  }
165
166
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
167

15268609
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
168
169
// https://infra.spec.whatwg.org/#c0-control
170

88219
CHAR_TEST(8, IsC0Control, (ch >= '\0' && ch <= '\x1f'))
171
172
// https://infra.spec.whatwg.org/#c0-control-or-space
173

318449
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
174
175
// https://infra.spec.whatwg.org/#ascii-digit
176

619006
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
177
178
// https://infra.spec.whatwg.org/#ascii-hex-digit
179


1137
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
180
                               (ch >= 'A' && ch <= 'F') ||
181
                               (ch >= 'a' && ch <= 'f')))
182
183
// https://infra.spec.whatwg.org/#ascii-alpha
184


1370984
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
185
                            (ch >= 'a' && ch <= 'z')))
186
187
// https://infra.spec.whatwg.org/#ascii-alphanumeric
188

602367
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
189
190
// https://infra.spec.whatwg.org/#ascii-lowercase
191
template <typename T>
192
602408
T ASCIILowercase(T ch) {
193
602408
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
194
}
195
196
// https://url.spec.whatwg.org/#forbidden-host-code-point
197








90067
CHAR_TEST(8,
198
          IsForbiddenHostCodePoint,
199
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' ||
200
              ch == '#' || ch == '/' || ch == ':' || ch == '?' || ch == '@' ||
201
              ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
202
              ch == '^' || ch == '|')
203
204
// https://url.spec.whatwg.org/#forbidden-domain-code-point
205


88342
CHAR_TEST(8,
206
          IsForbiddenDomainCodePoint,
207
          IsForbiddenHostCodePoint(ch) || IsC0Control(ch) || ch == '%' ||
208
              ch == '\x7f')
209
210
// https://url.spec.whatwg.org/#windows-drive-letter
211

12774
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
212
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
213
214
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
215

2790
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
216
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
217
218
#undef TWO_CHAR_STRING_TEST
219
220
12879932
bool BitAt(const uint8_t a[], const uint8_t i) {
221
12879932
  return !!(a[i >> 3] & (1 << (i & 7)));
222
}
223
224
// Appends ch to str. If ch position in encode_set is set, the ch will
225
// be percent-encoded then appended.
226
12879932
void AppendOrEscape(std::string* str,
227
                    const unsigned char ch,
228
                    const uint8_t encode_set[]) {
229
12879932
  if (BitAt(encode_set, ch))
230
1315
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
231
  else
232
12878617
    *str += ch;
233
12879932
}
234
235
782
unsigned hex2bin(const char ch) {
236

782
  if (ch >= '0' && ch <= '9')
237
515
    return ch - '0';
238

267
  if (ch >= 'A' && ch <= 'F')
239
173
    return 10 + (ch - 'A');
240

94
  if (ch >= 'a' && ch <= 'f')
241
94
    return 10 + (ch - 'a');
242
  UNREACHABLE();
243
}
244
245
4339
std::string PercentDecode(const char* input, size_t len) {
246
4339
  std::string dest;
247
4339
  if (len == 0)
248
2
    return dest;
249
4337
  dest.reserve(len);
250
4337
  const char* pointer = input;
251
4337
  const char* end = input + len;
252
253
94076
  while (pointer < end) {
254
89739
    const char ch = pointer[0];
255
89739
    size_t remaining = end - pointer - 1;
256


90139
    if (ch != '%' || remaining < 2 ||
257
400
        (ch == '%' &&
258
400
         (!IsASCIIHexDigit(pointer[1]) ||
259
397
          !IsASCIIHexDigit(pointer[2])))) {
260
89348
      dest += ch;
261
89348
      pointer++;
262
89348
      continue;
263
    } else {
264
391
      unsigned a = hex2bin(pointer[1]);
265
391
      unsigned b = hex2bin(pointer[2]);
266
391
      char c = static_cast<char>(a * 16 + b);
267
391
      dest += c;
268
391
      pointer += 3;
269
    }
270
  }
271
4337
  return dest;
272
}
273
274
#define SPECIALS(XX)                                                          \
275
  XX(ftp, 21, "ftp:")                                                         \
276
  XX(file, -1, "file:")                                                       \
277
  XX(http, 80, "http:")                                                       \
278
  XX(https, 443, "https:")                                                    \
279
  XX(ws, 80, "ws:")                                                           \
280
  XX(wss, 443, "wss:")
281
282
364701
bool IsSpecial(const std::string& scheme) {
283
#define V(_, __, name) if (scheme == name) return true;
284



364701
  SPECIALS(V);
285
#undef V
286
6678
  return false;
287
}
288
289
152780
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
290
#define V(key, _, name) if (scheme == name)                                  \
291
    return env->url_special_##key##_string();
292



152780
  SPECIALS(V)
293
#undef V
294
  UNREACHABLE();
295
}
296
297
145590
int NormalizePort(const std::string& scheme, int p) {
298
#define V(_, port, name) if (scheme == name && p == port) return -1;
299









145590
  SPECIALS(V);
300
#undef V
301
11701
  return p;
302
}
303
304
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
305
6974
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
306
6974
  size_t length = end - p;
307
6259
  return length >= 2 &&
308

13251
    IsWindowsDriveLetter(p[0], p[1]) &&
309
18
    (length == 2 ||
310
18
      p[2] == '/' ||
311
7
      p[2] == '\\' ||
312
3
      p[2] == '?' ||
313
6976
      p[2] == '#');
314
}
315
316
#if defined(NODE_HAVE_I18N_SUPPORT)
317
195
bool ToUnicode(const std::string& input, std::string* output) {
318
390
  MaybeStackBuffer<char> buf;
319
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
320
    return false;
321
195
  output->assign(*buf, buf.length());
322
195
  return true;
323
}
324
325
4326
bool ToASCII(const std::string& input, std::string* output) {
326
8652
  MaybeStackBuffer<char> buf;
327
4326
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
328
133
    return false;
329
4193
  if (buf.length() == 0)
330
22
    return false;
331
4171
  output->assign(*buf, buf.length());
332
4171
  return true;
333
}
334
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
335
// Intentional non-ops if ICU is not present.
336
bool ToUnicode(const std::string& input, std::string* output) {
337
  *output = input;
338
  return true;
339
}
340
341
bool ToASCII(const std::string& input, std::string* output) {
342
  *output = input;
343
  return true;
344
}
345
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
346
347
#define NS_IN6ADDRSZ 16
348
349
323
void URLHost::ParseIPv6Host(const char* input, size_t length) {
350
323
  CHECK_EQ(type_, HostType::H_FAILED);
351
352
  unsigned char buf[sizeof(struct in6_addr)];
353
323
  MaybeStackBuffer<char> ipv6(length + 1);
354
323
  *(*ipv6 + length) = 0;
355
323
  memset(buf, 0, sizeof(buf));
356
323
  memcpy(*ipv6, input, sizeof(const char) * length);
357
358
323
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
359
360
323
  if (ret != 0) {
361
64
    return;
362
  }
363
364
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
365
2331
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
366
2072
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
367
  }
368
369
259
  type_ = HostType::H_IPV6;
370
}
371
372
// https://url.spec.whatwg.org/#ipv4-number-parser
373
4989
int64_t ParseIPv4Number(const char* start, const char* end) {
374
4989
  if (end - start == 0) return -1;
375
376
4984
  unsigned R = 10;
377

4984
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
378
66
    start += 2;
379
66
    R = 16;
380

4918
  } else if (end - start >= 2 && start[0] == '0') {
381
36
    start++;
382
36
    R = 8;
383
  }
384
385
4984
  if (end - start == 0) return 0;
386
387
4977
  const char* p = start;
388
389
7709
  while (p < end) {
390
6156
    const char ch = p[0];
391

6156
    switch (R) {
392
48
      case 8:
393

48
        if (ch < '0' || ch > '7')
394
18
          return -1;
395
30
        break;
396
5768
      case 10:
397
5768
        if (!IsASCIIDigit(ch))
398
3404
          return -1;
399
2364
        break;
400
340
      case 16:
401
340
        if (!IsASCIIHexDigit(ch))
402
2
          return -1;
403
338
        break;
404
    }
405
2732
    p++;
406
  }
407
1553
  return strtoll(start, nullptr, R);
408
}
409
410
// https://url.spec.whatwg.org/#ends-in-a-number-checker
411
3838
bool EndsInANumber(const std::string& input) {
412
7676
  std::vector<std::string> parts = SplitString(input, '.', false);
413
414
3838
  if (parts.empty()) return false;
415
416
3838
  if (parts.back().empty()) {
417
49
    if (parts.size() == 1) return false;
418
49
    parts.pop_back();
419
  }
420
421
3838
  const std::string& last = parts.back();
422
423
  // If last is non-empty and contains only ASCII digits, then return true
424

3838
  if (!last.empty() && std::all_of(last.begin(), last.end(), ::isdigit)) {
425
430
    return true;
426
  }
427
428
3408
  const char* last_str = last.c_str();
429
3408
  int64_t num = ParseIPv4Number(last_str, last_str + last.size());
430
3408
  if (num >= 0) return true;
431
432
3378
  return false;
433
}
434
435
460
void URLHost::ParseIPv4Host(const char* input, size_t length) {
436
460
  CHECK_EQ(type_, HostType::H_FAILED);
437
460
  const char* pointer = input;
438
460
  const char* mark = input;
439
460
  const char* end = pointer + length;
440
460
  int parts = 0;
441
460
  uint32_t val = 0;
442
  uint64_t numbers[4];
443
460
  int tooBigNumbers = 0;
444
460
  if (length == 0)
445
108
    return;
446
447
4834
  while (pointer <= end) {
448
4466
    const char ch = pointer < end ? pointer[0] : kEOL;
449
4466
    int64_t remaining = end - pointer - 1;
450

4466
    if (ch == '.' || ch == kEOL) {
451
1611
      if (++parts > static_cast<int>(arraysize(numbers))) return;
452
1587
      if (pointer == mark)
453
6
        return;
454
1581
      int64_t n = ParseIPv4Number(mark, pointer);
455
1581
      if (n < 0)
456
51
        return;
457
458
1530
      if (n > 255) {
459
87
        tooBigNumbers++;
460
      }
461
1530
      numbers[parts - 1] = n;
462
1530
      mark = pointer + 1;
463

1530
      if (ch == '.' && remaining == 0)
464
11
        break;
465
    }
466
4374
    pointer++;
467
  }
468
379
  CHECK_GT(parts, 0);
469
470
  // If any but the last item in numbers is greater than 255, return failure.
471
  // If the last item in numbers is greater than or equal to
472
  // 256^(5 - the number of items in numbers), return failure.
473
376
  if (tooBigNumbers > 1 ||
474

797
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
475
367
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
476
27
    return;
477
  }
478
479
352
  type_ = HostType::H_IPV4;
480
352
  val = static_cast<uint32_t>(numbers[parts - 1]);
481
1329
  for (int n = 0; n < parts - 1; n++) {
482
977
    double b = 3 - n;
483
977
    val +=
484
977
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
485
  }
486
487
352
  value_.ipv4 = val;
488
}
489
490
354
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
491
354
  CHECK_EQ(type_, HostType::H_FAILED);
492
354
  std::string output;
493
354
  output.reserve(length);
494
2035
  for (size_t i = 0; i < length; i++) {
495
1725
    const char ch = input[i];
496
1725
    if (IsForbiddenHostCodePoint(ch)) {
497
44
      return;
498
    } else {
499
1681
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
500
    }
501
  }
502
503
310
  SetOpaque(std::move(output));
504
}
505
506
5006
void URLHost::ParseHost(const char* input,
507
                        size_t length,
508
                        bool is_special,
509
                        bool unicode) {
510
5006
  CHECK_EQ(type_, HostType::H_FAILED);
511
5006
  const char* pointer = input;
512
513
5006
  if (length == 0)
514
1628
    return;
515
516
5006
  if (pointer[0] == '[') {
517
326
    if (pointer[length - 1] != ']')
518
3
      return;
519
323
    return ParseIPv6Host(++pointer, length - 2);
520
  }
521
522
4680
  if (!is_special)
523
354
    return ParseOpaqueHost(input, length);
524
525
  // First, we have to percent decode
526
4326
  std::string decoded = PercentDecode(input, length);
527
528
  // Then we have to punycode toASCII
529
4326
  if (!ToASCII(decoded, &decoded))
530
155
    return;
531
532
  // If any of the following characters are still present, we have to fail
533
92180
  for (size_t n = 0; n < decoded.size(); n++) {
534
88342
    const char ch = decoded[n];
535
88342
    if (IsForbiddenDomainCodePoint(ch)) {
536
333
      return;
537
    }
538
  }
539
540
  // If domain ends in a number, then return the result of IPv4 parsing domain
541
3838
  if (EndsInANumber(decoded)) {
542
460
    return ParseIPv4Host(decoded.c_str(), decoded.length());
543
  }
544
545
  // If the unicode flag is set, run the result through punycode ToUnicode
546

3378
  if (unicode && !ToUnicode(decoded, &decoded))
547
    return;
548
549
  // It's not an IPv4 or IPv6 address, it must be a domain
550
3378
  SetDomain(std::move(decoded));
551
}
552
553
// Locates the longest sequence of 0 segments in an IPv6 address
554
// in order to use the :: compression when serializing
555
template <typename T>
556
259
T* FindLongestZeroSequence(T* values, size_t len) {
557
259
  T* start = values;
558
259
  T* end = start + len;
559
259
  T* result = nullptr;
560
561
259
  T* current = nullptr;
562
259
  unsigned counter = 0, longest = 1;
563
564
2331
  while (start < end) {
565
2072
    if (*start == 0) {
566
1787
      if (current == nullptr)
567
266
        current = start;
568
1787
      counter++;
569
    } else {
570
285
      if (counter > longest) {
571
255
        longest = counter;
572
255
        result = current;
573
      }
574
285
      counter = 0;
575
285
      current = nullptr;
576
    }
577
2072
    start++;
578
  }
579
259
  if (counter > longest)
580
3
    result = current;
581
259
  return result;
582
}
583
584
4299
std::string URLHost::ToStringMove() {
585
4299
  std::string return_value;
586
4299
  switch (type_) {
587
3688
    case HostType::H_DOMAIN:
588
    case HostType::H_OPAQUE:
589
3688
      return_value = std::move(value_.domain_or_opaque);
590
3688
      break;
591
611
    default:
592
611
      return_value = ToString();
593
611
      break;
594
  }
595
4299
  Reset();
596
4299
  return return_value;
597
}
598
599
611
std::string URLHost::ToString() const {
600
1222
  std::string dest;
601

611
  switch (type_) {
602
    case HostType::H_DOMAIN:
603
    case HostType::H_OPAQUE:
604
      return value_.domain_or_opaque;
605
352
    case HostType::H_IPV4: {
606
352
      dest.reserve(15);
607
352
      uint32_t value = value_.ipv4;
608
1760
      for (int n = 0; n < 4; n++) {
609
1408
        dest.insert(0, std::to_string(value % 256));
610
1408
        if (n < 3)
611
1056
          dest.insert(0, 1, '.');
612
1408
        value /= 256;
613
      }
614
352
      break;
615
    }
616
259
    case HostType::H_IPV6: {
617
259
      dest.reserve(41);
618
259
      dest += '[';
619
259
      const uint16_t* start = &value_.ipv6[0];
620
      const uint16_t* compress_pointer =
621
259
          FindLongestZeroSequence(start, 8);
622
259
      bool ignore0 = false;
623
2331
      for (int n = 0; n <= 7; n++) {
624
2072
        const uint16_t* piece = &value_.ipv6[n];
625

2072
        if (ignore0 && *piece == 0)
626
1777
          continue;
627
552
        else if (ignore0)
628
254
          ignore0 = false;
629
552
        if (compress_pointer == piece) {
630
257
          dest += n == 0 ? "::" : ":";
631
257
          ignore0 = true;
632
257
          continue;
633
        }
634
        char buf[5];
635
295
        snprintf(buf, sizeof(buf), "%x", *piece);
636
295
        dest += buf;
637
295
        if (n < 7)
638
39
          dest += ':';
639
      }
640
259
      dest += ']';
641
259
      break;
642
    }
643
    case HostType::H_FAILED:
644
      break;
645
  }
646
611
  return dest;
647
}
648
649
4616
bool ParseHost(const std::string& input,
650
               std::string* output,
651
               bool is_special,
652
               bool unicode = false) {
653
4616
  if (input.empty()) {
654
48
    output->clear();
655
48
    return true;
656
  }
657
9136
  URLHost host;
658
4568
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
659
4568
  if (host.ParsingFailed())
660
681
    return false;
661
3887
  *output = host.ToStringMove();
662
3887
  return true;
663
}
664
665
8519
std::vector<std::string> FromJSStringArray(Environment* env,
666
                                           Local<Array> array) {
667
8519
  std::vector<std::string> vec;
668
8519
  if (array->Length() > 0)
669
8511
    vec.reserve(array->Length());
670
135968
  for (size_t n = 0; n < array->Length(); n++) {
671
118930
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
672
118930
    if (val->IsString()) {
673
59465
      Utf8Value value(env->isolate(), val.As<String>());
674
59465
      vec.emplace_back(*value, value.length());
675
    }
676
  }
677
8519
  return vec;
678
}
679
680
8519
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
681
8519
  url_data base;
682
8519
  Local<Context> context = env->context();
683
684
  Local<Value> flags =
685
25557
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
686
8519
  if (flags->IsInt32())
687
17038
    base.flags = flags->Int32Value(context).FromJust();
688
689
  Local<Value> port =
690
25557
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
691
8519
  if (port->IsInt32())
692
72
    base.port = port->Int32Value(context).FromJust();
693
694
  Local<Value> scheme =
695
17038
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
696
8519
  base.scheme = Utf8Value(env->isolate(), scheme).out();
697
698
  auto GetStr = [&](std::string url_data::*member,
699
                    int flag,
700
                    Local<String> name,
701
42595
                    bool empty_as_present) {
702
85190
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
703
85190
    if (value->IsString()) {
704
49554
      Utf8Value utf8value(env->isolate(), value.As<String>());
705
24777
      (base.*member).assign(*utf8value, utf8value.length());
706

41815
      if (empty_as_present || value.As<String>()->Length() != 0) {
707
7751
        base.flags |= flag;
708
      }
709
    }
710
51114
  };
711
8519
  GetStr(&url_data::username,
712
         URL_FLAGS_HAS_USERNAME,
713
         env->username_string(),
714
         false);
715
8519
  GetStr(&url_data::password,
716
         URL_FLAGS_HAS_PASSWORD,
717
         env->password_string(),
718
         false);
719
8519
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
720
8519
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
721
8519
  GetStr(&url_data::fragment,
722
         URL_FLAGS_HAS_FRAGMENT,
723
         env->fragment_string(),
724
         true);
725
726
  Local<Value>
727
25557
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
728
8519
  if (path->IsArray()) {
729
8519
    base.flags |= URL_FLAGS_HAS_PATH;
730
8519
    base.path = FromJSStringArray(env, path.As<Array>());
731
  }
732
8519
  return base;
733
}
734
735
46200
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
736
46200
  url_data context;
737
  Local<Value> flags =
738
138600
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
739
46200
  if (flags->IsInt32()) {
740
    static constexpr int32_t kCopyFlagsMask =
741
        URL_FLAGS_SPECIAL |
742
        URL_FLAGS_CANNOT_BE_BASE |
743
        URL_FLAGS_HAS_USERNAME |
744
        URL_FLAGS_HAS_PASSWORD |
745
        URL_FLAGS_HAS_HOST;
746
46200
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
747
  }
748
  Local<Value> scheme =
749
138600
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
750
92400
  if (scheme->IsString()) {
751
92400
    Utf8Value value(env->isolate(), scheme);
752
46200
    context.scheme.assign(*value, value.length());
753
  }
754
  Local<Value> port =
755
138600
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
756
46200
  if (port->IsInt32())
757
228
    context.port = port.As<Int32>()->Value();
758
46200
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
759
    Local<Value> username =
760
214
        context_obj->Get(env->context(),
761
642
                         env->username_string()).ToLocalChecked();
762
428
    CHECK(username->IsString());
763
428
    Utf8Value value(env->isolate(), username);
764
214
    context.username.assign(*value, value.length());
765
  }
766
46200
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
767
    Local<Value> password =
768
208
        context_obj->Get(env->context(),
769
624
                         env->password_string()).ToLocalChecked();
770
416
    CHECK(password->IsString());
771
416
    Utf8Value value(env->isolate(), password);
772
208
    context.password.assign(*value, value.length());
773
  }
774
  Local<Value> host =
775
46200
      context_obj->Get(env->context(),
776
138600
                       env->host_string()).ToLocalChecked();
777
92400
  if (host->IsString()) {
778
92362
    Utf8Value value(env->isolate(), host);
779
46181
    context.host.assign(*value, value.length());
780
  }
781
46200
  return context;
782
}
783
784
// Single dot segment can be ".", "%2e", or "%2E"
785
2687221
bool IsSingleDotSegment(const std::string& str) {
786
2687221
  switch (str.size()) {
787
7335
    case 1:
788
7335
      return str == ".";
789
149568
    case 3:
790
149568
      return str[0] == '%' &&
791

149594
             str[1] == '2' &&
792
149594
             ASCIILowercase(str[2]) == 'e';
793
2530318
    default:
794
2530318
      return false;
795
  }
796
}
797
798
// Double dot segment can be:
799
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
800
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
801
1348594
bool IsDoubleDotSegment(const std::string& str) {
802

1348594
  switch (str.size()) {
803
5107
    case 2:
804
5107
      return str == "..";
805
384697
    case 4:
806

384697
      if (str[0] != '.' && str[0] != '%')
807
384685
        return false;
808
12
      return ((str[0] == '.' &&
809
9
               str[1] == '%' &&
810

4
               str[2] == '2' &&
811
26
               ASCIILowercase(str[3]) == 'e') ||
812
10
              (str[0] == '%' &&
813

6
               str[1] == '2' &&
814
3
               ASCIILowercase(str[2]) == 'e' &&
815
15
               str[3] == '.'));
816
78318
    case 6:
817
78318
      return (str[0] == '%' &&
818

16
              str[1] == '2' &&
819
8
              ASCIILowercase(str[2]) == 'e' &&
820
2
              str[3] == '%' &&
821

78328
              str[4] == '2' &&
822
78320
              ASCIILowercase(str[5]) == 'e');
823
880472
    default:
824
880472
      return false;
825
  }
826
}
827
828
11576
void ShortenUrlPath(struct url_data* url) {
829
11576
  if (url->path.empty()) return;
830


11831
  if (url->path.size() == 1 && url->scheme == "file:" &&
831
621
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
832
11210
  url->path.pop_back();
833
}
834
835
}  // anonymous namespace
836
837
242773
void URL::Parse(const char* input,
838
                size_t len,
839
                enum url_parse_state state_override,
840
                struct url_data* url,
841
                bool has_url,
842
                const struct url_data* base,
843
                bool has_base) {
844
242773
  const char* p = input;
845
242773
  const char* end = input + len;
846
847
242773
  if (!has_url) {
848
159238
    for (const char* ptr = p; ptr < end; ptr++) {
849
159226
      if (IsC0ControlOrSpace(*ptr))
850
28
        p++;
851
      else
852
159198
        break;
853
    }
854
159235
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
855
159223
      if (IsC0ControlOrSpace(*ptr))
856
25
        end--;
857
      else
858
159198
        break;
859
    }
860
159210
    input = p;
861
159210
    len = end - p;
862
  }
863
864
  // The spec says we should strip out any ASCII tabs or newlines.
865
  // In those cases, we create another std::string instance with the filtered
866
  // contents, but in the general case we avoid the overhead.
867
242773
  std::string whitespace_stripped;
868
15510677
  for (const char* ptr = p; ptr < end; ptr++) {
869
15268052
    if (!IsASCIITabOrNewline(*ptr))
870
15267904
      continue;
871
    // Hit tab or newline. Allocate storage, copy what we have until now,
872
    // and then iterate and filter all similar characters out.
873
148
    whitespace_stripped.reserve(len - 1);
874
148
    whitespace_stripped.assign(p, ptr - p);
875
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
876
705
    for (ptr = ptr + 1; ptr < end; ptr++) {
877
557
      if (!IsASCIITabOrNewline(*ptr))
878
515
        whitespace_stripped += *ptr;
879
    }
880
881
    // Update variables like they should have looked like if the string
882
    // had been stripped of whitespace to begin with.
883
148
    input = whitespace_stripped.c_str();
884
148
    len = whitespace_stripped.size();
885
148
    p = input;
886
148
    end = input + len;
887
148
    break;
888
  }
889
890
242773
  bool atflag = false;  // Set when @ has been seen.
891
242773
  bool square_bracket_flag = false;  // Set inside of [...]
892
242773
  bool password_token_seen_flag = false;  // Set after a : after an username.
893
894
242773
  std::string buffer;
895
896
  // Set the initial parse state.
897
242773
  const bool has_state_override = state_override != kUnknownState;
898
242773
  enum url_parse_state state = has_state_override ? state_override :
899
                                                    kSchemeStart;
900
901

242773
  if (state < kSchemeStart || state > kFragment) {
902
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
903
    return;
904
  }
905
906
16065148
  while (p <= end) {
907
15831457
    const char ch = p < end ? p[0] : kEOL;
908
15831457
    bool special = (url->flags & URL_FLAGS_SPECIAL);
909
    bool cannot_be_base;
910

15831457
    bool special_back_slash = (special && ch == '\\');
911
912





15831457
    switch (state) {
913
159265
      case kSchemeStart:
914
159265
        if (IsASCIIAlpha(ch)) {
915
145895
          buffer += ASCIILowercase(ch);
916
145895
          state = kScheme;
917
13370
        } else if (!has_state_override) {
918
13363
          state = kNoScheme;
919
13363
          continue;
920
        } else {
921
7
          url->flags |= URL_FLAGS_FAILED;
922
7
          return;
923
        }
924
145895
        break;
925
602367
      case kScheme:
926


602367
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
927
456472
          buffer += ASCIILowercase(ch);
928

145895
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
929

143839
          if (has_state_override && buffer.size() == 0) {
930
            url->flags |= URL_FLAGS_TERMINATED;
931
            return;
932
          }
933
143839
          buffer += ':';
934
935
143839
          bool new_is_special = IsSpecial(buffer);
936
937
143839
          if (has_state_override) {
938
32
            if ((special != new_is_special) ||
939
32
                ((buffer == "file:") &&
940
3
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
941
1
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
942


77
                  (url->port != -1))) ||
943
32
                  (url->scheme == "file:" && url->host.empty())) {
944
16
              url->flags |= URL_FLAGS_TERMINATED;
945
16
              return;
946
            }
947
          }
948
949
143823
          url->scheme = std::move(buffer);
950
143823
          url->port = NormalizePort(url->scheme, url->port);
951
143823
          if (new_is_special) {
952
137581
            url->flags |= URL_FLAGS_SPECIAL;
953
137581
            special = true;
954
          } else {
955
6242
            url->flags &= ~URL_FLAGS_SPECIAL;
956
6242
            special = false;
957
          }
958
          // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
959
          // here always not equals to `\\`. So `special_back_slash` here always
960
          // equals to `false`.
961
143823
          special_back_slash = false;
962
143823
          buffer.clear();
963
143823
          if (has_state_override)
964
26
            return;
965
143797
          if (url->scheme == "file:") {
966
133649
            state = kFile;
967
3917
          } else if (special &&
968

14065
                     has_base &&
969
777
                     url->scheme == base->scheme) {
970
204
            state = kSpecialRelativeOrAuthority;
971
9944
          } else if (special) {
972
3713
            state = kSpecialAuthoritySlashes;
973

6231
          } else if (p + 1 < end && p[1] == '/') {
974
442
            state = kPathOrAuthority;
975
442
            p++;
976
          } else {
977
5789
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
978
5789
            url->flags |= URL_FLAGS_HAS_PATH;
979
5789
            url->path.emplace_back("");
980
5789
            state = kCannotBeBase;
981
143797
          }
982
2056
        } else if (!has_state_override) {
983
2050
          buffer.clear();
984
2050
          state = kNoScheme;
985
2050
          p = input;
986
2050
          continue;
987
        } else {
988
6
          url->flags |= URL_FLAGS_FAILED;
989
6
          return;
990
        }
991
600269
        break;
992
15413
      case kNoScheme:
993

15413
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
994

15413
        if (!has_base || (cannot_be_base && ch != '#')) {
995
8027
          url->flags |= URL_FLAGS_FAILED;
996
8027
          return;
997

7386
        } else if (cannot_be_base && ch == '#') {
998
14
          url->scheme = base->scheme;
999
14
          if (IsSpecial(url->scheme)) {
1000
            url->flags |= URL_FLAGS_SPECIAL;
1001
            special = true;
1002
          } else {
1003
14
            url->flags &= ~URL_FLAGS_SPECIAL;
1004
14
            special = false;
1005
          }
1006

14
          special_back_slash = (special && ch == '\\');
1007
14
          if (base->flags & URL_FLAGS_HAS_PATH) {
1008
14
            url->flags |= URL_FLAGS_HAS_PATH;
1009
14
            url->path = base->path;
1010
          }
1011
14
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1012
2
            url->flags |= URL_FLAGS_HAS_QUERY;
1013
2
            url->query = base->query;
1014
          }
1015
14
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1016
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1017
            url->fragment = base->fragment;
1018
          }
1019
14
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1020
14
          state = kFragment;
1021

14744
        } else if (has_base &&
1022
7372
                   base->scheme != "file:") {
1023
392
          state = kRelative;
1024
392
          continue;
1025
        } else {
1026
6980
          url->scheme = "file:";
1027
6980
          url->flags |= URL_FLAGS_SPECIAL;
1028
6980
          special = true;
1029
6980
          state = kFile;
1030

6980
          special_back_slash = (special && ch == '\\');
1031
6980
          continue;
1032
        }
1033
14
        break;
1034
204
      case kSpecialRelativeOrAuthority:
1035

204
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1036
188
          state = kSpecialAuthorityIgnoreSlashes;
1037
188
          p++;
1038
        } else {
1039
16
          state = kRelative;
1040
16
          continue;
1041
        }
1042
188
        break;
1043
442
      case kPathOrAuthority:
1044
442
        if (ch == '/') {
1045
357
          state = kAuthority;
1046
        } else {
1047
85
          state = kPath;
1048
85
          continue;
1049
        }
1050
357
        break;
1051
408
      case kRelative:
1052
408
        url->scheme = base->scheme;
1053
408
        if (IsSpecial(url->scheme)) {
1054
358
          url->flags |= URL_FLAGS_SPECIAL;
1055
358
          special = true;
1056
        } else {
1057
50
          url->flags &= ~URL_FLAGS_SPECIAL;
1058
50
          special = false;
1059
        }
1060

408
        special_back_slash = (special && ch == '\\');
1061

408
        switch (ch) {
1062
9
          case kEOL:
1063
9
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1064
2
              url->flags |= URL_FLAGS_HAS_USERNAME;
1065
2
              url->username = base->username;
1066
            }
1067
9
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1068
2
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1069
2
              url->password = base->password;
1070
            }
1071
9
            if (base->flags & URL_FLAGS_HAS_HOST) {
1072
8
              url->flags |= URL_FLAGS_HAS_HOST;
1073
8
              url->host = base->host;
1074
            }
1075
9
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1076
              url->flags |= URL_FLAGS_HAS_QUERY;
1077
              url->query = base->query;
1078
            }
1079
9
            if (base->flags & URL_FLAGS_HAS_PATH) {
1080
9
              url->flags |= URL_FLAGS_HAS_PATH;
1081
9
              url->path = base->path;
1082
            }
1083
9
            url->port = base->port;
1084
9
            break;
1085
237
          case '/':
1086
237
            state = kRelativeSlash;
1087
237
            break;
1088
24
          case '?':
1089
24
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1090
              url->flags |= URL_FLAGS_HAS_USERNAME;
1091
              url->username = base->username;
1092
            }
1093
24
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1094
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1095
              url->password = base->password;
1096
            }
1097
24
            if (base->flags & URL_FLAGS_HAS_HOST) {
1098
22
              url->flags |= URL_FLAGS_HAS_HOST;
1099
22
              url->host = base->host;
1100
            }
1101
24
            if (base->flags & URL_FLAGS_HAS_PATH) {
1102
24
              url->flags |= URL_FLAGS_HAS_PATH;
1103
24
              url->path = base->path;
1104
            }
1105
24
            url->port = base->port;
1106
24
            state = kQuery;
1107
24
            break;
1108
19
          case '#':
1109
19
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1110
              url->flags |= URL_FLAGS_HAS_USERNAME;
1111
              url->username = base->username;
1112
            }
1113
19
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1114
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1115
              url->password = base->password;
1116
            }
1117
19
            if (base->flags & URL_FLAGS_HAS_HOST) {
1118
17
              url->flags |= URL_FLAGS_HAS_HOST;
1119
17
              url->host = base->host;
1120
            }
1121
19
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1122
              url->flags |= URL_FLAGS_HAS_QUERY;
1123
              url->query = base->query;
1124
            }
1125
19
            if (base->flags & URL_FLAGS_HAS_PATH) {
1126
19
              url->flags |= URL_FLAGS_HAS_PATH;
1127
19
              url->path = base->path;
1128
            }
1129
19
            url->port = base->port;
1130
19
            state = kFragment;
1131
19
            break;
1132
119
          default:
1133
119
            if (special_back_slash) {
1134
10
              state = kRelativeSlash;
1135
            } else {
1136
109
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1137
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1138
1
                url->username = base->username;
1139
              }
1140
109
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1141
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1142
1
                url->password = base->password;
1143
              }
1144
109
              if (base->flags & URL_FLAGS_HAS_HOST) {
1145
99
                url->flags |= URL_FLAGS_HAS_HOST;
1146
99
                url->host = base->host;
1147
              }
1148
109
              if (base->flags & URL_FLAGS_HAS_PATH) {
1149
109
                url->flags |= URL_FLAGS_HAS_PATH;
1150
109
                url->path = base->path;
1151
109
                ShortenUrlPath(url);
1152
              }
1153
109
              url->port = base->port;
1154
109
              state = kPath;
1155
109
              continue;
1156
            }
1157
        }
1158
299
        break;
1159
247
      case kRelativeSlash:
1160


247
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1161
12
          state = kSpecialAuthorityIgnoreSlashes;
1162
235
        } else if (ch == '/') {
1163
3
          state = kAuthority;
1164
        } else {
1165
232
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1166
4
            url->flags |= URL_FLAGS_HAS_USERNAME;
1167
4
            url->username = base->username;
1168
          }
1169
232
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1170
2
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1171
2
            url->password = base->password;
1172
          }
1173
232
          if (base->flags & URL_FLAGS_HAS_HOST) {
1174
228
            url->flags |= URL_FLAGS_HAS_HOST;
1175
228
            url->host = base->host;
1176
          }
1177
232
          url->port = base->port;
1178
232
          state = kPath;
1179
232
          continue;
1180
        }
1181
15
        break;
1182
3713
      case kSpecialAuthoritySlashes:
1183
3713
        state = kSpecialAuthorityIgnoreSlashes;
1184

3713
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1185
3628
          p++;
1186
        } else {
1187
85
          continue;
1188
        }
1189
3628
        break;
1190
3956
      case kSpecialAuthorityIgnoreSlashes:
1191

3956
        if (ch != '/' && ch != '\\') {
1192
3913
          state = kAuthority;
1193
3913
          continue;
1194
        }
1195
43
        break;
1196
93620
      case kAuthority:
1197
93620
        if (ch == '@') {
1198
437
          if (atflag) {
1199
22
            buffer.reserve(buffer.size() + 3);
1200
22
            buffer.insert(0, "%40");
1201
          }
1202
437
          atflag = true;
1203
437
          size_t blen = buffer.size();
1204

437
          if (blen > 0 && buffer[0] != ':') {
1205
379
            url->flags |= URL_FLAGS_HAS_USERNAME;
1206
          }
1207
5815
          for (size_t n = 0; n < blen; n++) {
1208
5378
            const char bch = buffer[n];
1209
5378
            if (bch == ':') {
1210
368
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1211
368
              if (!password_token_seen_flag) {
1212
358
                password_token_seen_flag = true;
1213
358
                continue;
1214
              }
1215
            }
1216
5020
            if (password_token_seen_flag) {
1217
2447
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1218
            } else {
1219
2573
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1220
            }
1221
          }
1222
437
          buffer.clear();
1223

93183
        } else if (ch == kEOL ||
1224
88948
                   ch == '/' ||
1225
88930
                   ch == '?' ||
1226
88920
                   ch == '#' ||
1227
                   special_back_slash) {
1228

4273
          if (atflag && buffer.size() == 0) {
1229
39
            url->flags |= URL_FLAGS_FAILED;
1230
39
            return;
1231
          }
1232
4234
          p -= buffer.size() + 1;
1233
4234
          buffer.clear();
1234
4234
          state = kHost;
1235
        } else {
1236
88910
          buffer += ch;
1237
        }
1238
93581
        break;
1239
84496
      case kHost:
1240
      case kHostname:
1241

84496
        if (has_state_override && url->scheme == "file:") {
1242
6
          state = kFileHost;
1243
6
          continue;
1244

84490
        } else if (ch == ':' && !square_bracket_flag) {
1245
1793
          if (buffer.size() == 0) {
1246
19
            url->flags |= URL_FLAGS_FAILED;
1247
19
            return;
1248
          }
1249
1774
          if (state_override == kHostname) {
1250
2
            return;
1251
          }
1252
1772
          url->flags |= URL_FLAGS_HAS_HOST;
1253
1772
          if (!ParseHost(buffer, &url->host, special)) {
1254
4
            url->flags |= URL_FLAGS_FAILED;
1255
4
            return;
1256
          }
1257
1768
          buffer.clear();
1258
1768
          state = kPort;
1259

82697
        } else if (ch == kEOL ||
1260
80000
                   ch == '/' ||
1261
79978
                   ch == '?' ||
1262
79964
                   ch == '#' ||
1263
                   special_back_slash) {
1264
2745
          p--;
1265

2745
          if (special && buffer.size() == 0) {
1266
14
            url->flags |= URL_FLAGS_FAILED;
1267
14
            return;
1268
          }
1269
279
          if (has_state_override &&
1270

3029
              buffer.size() == 0 &&
1271
40
              ((url->username.size() > 0 || url->password.size() > 0) ||
1272
19
               url->port != -1)) {
1273
4
            url->flags |= URL_FLAGS_TERMINATED;
1274
4
            return;
1275
          }
1276
2727
          url->flags |= URL_FLAGS_HAS_HOST;
1277
2727
          if (!ParseHost(buffer, &url->host, special)) {
1278
639
            url->flags |= URL_FLAGS_FAILED;
1279
639
            return;
1280
          }
1281
2088
          buffer.clear();
1282
2088
          state = kPathStart;
1283
2088
          if (has_state_override) {
1284
187
            return;
1285
          }
1286
        } else {
1287
79952
          if (ch == '[')
1288
328
            square_bracket_flag = true;
1289
79952
          if (ch == ']')
1290
325
            square_bracket_flag = false;
1291
79952
          buffer += ch;
1292
        }
1293
83621
        break;
1294
9734
      case kPort:
1295
9734
        if (IsASCIIDigit(ch)) {
1296
7913
          buffer += ch;
1297

1821
        } else if (has_state_override ||
1298
1114
                   ch == kEOL ||
1299
27
                   ch == '/' ||
1300
27
                   ch == '?' ||
1301
27
                   ch == '#' ||
1302
                   special_back_slash) {
1303
1794
          if (buffer.size() > 0) {
1304
1785
            unsigned port = 0;
1305
            // the condition port <= 0xffff prevents integer overflow
1306

9536
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1307
7751
              port = port * 10 + buffer[i] - '0';
1308
1785
            if (port > 0xffff) {
1309
              // TODO(TimothyGu): This hack is currently needed for the host
1310
              // setter since it needs access to hostname if it is valid, and
1311
              // if the FAILED flag is set the entire response to JS layer
1312
              // will be empty.
1313
18
              if (state_override == kHost)
1314
1
                url->port = -1;
1315
              else
1316
17
                url->flags |= URL_FLAGS_FAILED;
1317
18
              return;
1318
            }
1319
            // the port is valid
1320
1767
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1321
1767
            if (url->port == -1)
1322
239
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1323
1767
            buffer.clear();
1324
9
          } else if (has_state_override) {
1325
            // TODO(TimothyGu): Similar case as above.
1326
5
            if (state_override == kHost)
1327
1
              url->port = -1;
1328
            else
1329
4
              url->flags |= URL_FLAGS_TERMINATED;
1330
5
            return;
1331
          }
1332
1771
          state = kPathStart;
1333
1771
          continue;
1334
        } else {
1335
27
          url->flags |= URL_FLAGS_FAILED;
1336
27
          return;
1337
        }
1338
7913
        break;
1339
140629
      case kFile:
1340
140629
        url->scheme = "file:";
1341
140629
        url->host.clear();
1342
140629
        url->flags |= URL_FLAGS_HAS_HOST;
1343

140629
        if (ch == '/' || ch == '\\') {
1344
133753
          state = kFileSlash;
1345

6876
        } else if (has_base && base->scheme == "file:") {
1346

6865
          switch (ch) {
1347
2
            case kEOL:
1348
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1349
2
                url->host = base->host;
1350
              }
1351
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1352
2
                url->flags |= URL_FLAGS_HAS_PATH;
1353
2
                url->path = base->path;
1354
              }
1355
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1356
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1357
2
                url->query = base->query;
1358
              }
1359
2
              break;
1360
2
            case '?':
1361
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1362
2
                url->host = base->host;
1363
              }
1364
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1365
2
                url->flags |= URL_FLAGS_HAS_PATH;
1366
2
                url->path = base->path;
1367
              }
1368
2
              url->flags |= URL_FLAGS_HAS_QUERY;
1369
2
              url->query.clear();
1370
2
              state = kQuery;
1371
2
              break;
1372
2
            case '#':
1373
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1374
2
                url->host = base->host;
1375
              }
1376
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1377
2
                url->flags |= URL_FLAGS_HAS_PATH;
1378
2
                url->path = base->path;
1379
              }
1380
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1381
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1382
2
                url->query = base->query;
1383
              }
1384
2
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1385
2
              url->fragment.clear();
1386
2
              state = kFragment;
1387
2
              break;
1388
6859
            default:
1389
6859
              url->query.clear();
1390
6859
              if (base->flags & URL_FLAGS_HAS_HOST) {
1391
6859
                url->host = base->host;
1392
              }
1393
6859
              if (base->flags & URL_FLAGS_HAS_PATH) {
1394
6859
                url->flags |= URL_FLAGS_HAS_PATH;
1395
6859
                url->path = base->path;
1396
              }
1397
6859
              if (!StartsWithWindowsDriveLetter(p, end)) {
1398
6847
                ShortenUrlPath(url);
1399
              } else {
1400
12
                url->path.clear();
1401
              }
1402
6859
              state = kPath;
1403
6859
              continue;
1404
          }
1405
        } else {
1406
11
          state = kPath;
1407
11
          continue;
1408
        }
1409
133759
        break;
1410
133753
      case kFileSlash:
1411

133753
        if (ch == '/' || ch == '\\') {
1412
133628
          state = kFileHost;
1413
        } else {
1414

125
          if (has_base && base->scheme == "file:") {
1415
115
            url->flags |= URL_FLAGS_HAS_HOST;
1416
115
            url->host = base->host;
1417

223
            if (!StartsWithWindowsDriveLetter(p, end) &&
1418
108
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1419
2
              url->flags |= URL_FLAGS_HAS_PATH;
1420
2
              url->path.push_back(base->path[0]);
1421
            }
1422
          }
1423
125
          state = kPath;
1424
125
          continue;
1425
        }
1426
133628
        break;
1427
134326
      case kFileHost:
1428

134326
        if (ch == kEOL ||
1429
697
            ch == '/' ||
1430
692
            ch == '\\' ||
1431
692
            ch == '?' ||
1432
            ch == '#') {
1433
133628
          if (!has_state_override &&
1434

267262
              buffer.size() == 2 &&
1435
12
              IsWindowsDriveLetter(buffer)) {
1436
6
            state = kPath;
1437
133628
          } else if (buffer.size() == 0) {
1438
133511
            url->flags |= URL_FLAGS_HAS_HOST;
1439
133511
            url->host.clear();
1440
133511
            if (has_state_override)
1441
2
              return;
1442
133509
            state = kPathStart;
1443
          } else {
1444
117
            std::string host;
1445
117
            if (!ParseHost(buffer, &host, special)) {
1446
38
              url->flags |= URL_FLAGS_FAILED;
1447
38
              return;
1448
            }
1449
79
            if (host == "localhost")
1450
24
              host.clear();
1451
79
            url->flags |= URL_FLAGS_HAS_HOST;
1452
79
            url->host = host;
1453
79
            if (has_state_override)
1454
2
              return;
1455
77
            buffer.clear();
1456
77
            state = kPathStart;
1457
          }
1458
133592
          continue;
1459
        } else {
1460
692
          buffer += ch;
1461
        }
1462
692
        break;
1463
220193
      case kPathStart:
1464
220193
        if (IsSpecial(url->scheme)) {
1465
219836
          state = kPath;
1466

219836
          if (ch != '/' && ch != '\\') {
1467
83844
            continue;
1468
          }
1469

357
        } else if (!has_state_override && ch == '?') {
1470
3
          url->flags |= URL_FLAGS_HAS_QUERY;
1471
3
          url->query.clear();
1472
3
          state = kQuery;
1473

354
        } else if (!has_state_override && ch == '#') {
1474
3
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1475
3
          url->fragment.clear();
1476
3
          state = kFragment;
1477
351
        } else if (ch != kEOL) {
1478
306
          state = kPath;
1479
306
          if (ch != '/') {
1480
27
            continue;
1481
          }
1482

45
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1483
1
          url->flags |= URL_FLAGS_HAS_PATH;
1484
1
          url->path.emplace_back("");
1485
        }
1486
136322
        break;
1487
14102979
      case kPath:
1488

14102979
        if (ch == kEOL ||
1489
12755374
            ch == '/' ||
1490
12755339
            special_back_slash ||
1491

12755339
            (!has_state_override && (ch == '?' || ch == '#'))) {
1492
1348594
          if (IsDoubleDotSegment(buffer)) {
1493
4620
            ShortenUrlPath(url);
1494

4620
            if (ch != '/' && !special_back_slash) {
1495
262
              url->flags |= URL_FLAGS_HAS_PATH;
1496
262
              url->path.emplace_back("");
1497
            }
1498
1347141
          } else if (IsSingleDotSegment(buffer) &&
1499

1347141
                     ch != '/' && !special_back_slash) {
1500
727
            url->flags |= URL_FLAGS_HAS_PATH;
1501
727
            url->path.emplace_back("");
1502
1343247
          } else if (!IsSingleDotSegment(buffer)) {
1503
2676069
            if (url->scheme == "file:" &&
1504
1515002
                url->path.empty() &&
1505

2855809
                buffer.size() == 2 &&
1506
52
                IsWindowsDriveLetter(buffer)) {
1507
51
              buffer[1] = ':';
1508
            }
1509
1340807
            url->flags |= URL_FLAGS_HAS_PATH;
1510
1340807
            url->path.emplace_back(std::move(buffer));
1511
          }
1512
1348594
          buffer.clear();
1513
2697188
          if (ch == '?') {
1514
910
            url->flags |= URL_FLAGS_HAS_QUERY;
1515
910
            url->query.clear();
1516
910
            state = kQuery;
1517
1347684
          } else if (ch == '#') {
1518
44
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1519
44
            url->fragment.clear();
1520
44
            state = kFragment;
1521
          }
1522
        } else {
1523
12754385
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1524
        }
1525
14102979
        break;
1526
60794
      case kCannotBeBase:
1527
60794
        switch (ch) {
1528
6
          case '?':
1529
6
            state = kQuery;
1530
6
            break;
1531
5
          case '#':
1532
5
            state = kFragment;
1533
5
            break;
1534
60783
          default:
1535
60783
            if (url->path.empty())
1536
              url->path.emplace_back("");
1537
60783
            else if (ch != kEOL)
1538
55005
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1539
        }
1540
60794
        break;
1541
61284
      case kQuery:
1542

61284
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1543
1096
          url->flags |= URL_FLAGS_HAS_QUERY;
1544
1096
          url->query = std::move(buffer);
1545
1096
          buffer.clear();
1546
1442
          if (ch == '#')
1547
346
            state = kFragment;
1548
        } else {
1549
60188
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1550
                                                QUERY_ENCODE_SET_NONSPECIAL);
1551
        }
1552
61284
        break;
1553
3634
      case kFragment:
1554
3634
        switch (ch) {
1555
492
          case kEOL:
1556
492
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1557
492
            url->fragment = std::move(buffer);
1558
492
            break;
1559
3142
          default:
1560
3142
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1561
        }
1562
3634
        break;
1563
      default:
1564
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1565
        return;
1566
    }
1567
1568
15568915
    p++;
1569
  }
1570
}  // NOLINT(readability/fn_size)
1571
1572
// https://url.spec.whatwg.org/#url-serializing
1573
37363
std::string URL::SerializeURL(const url_data& url,
1574
                              bool exclude = false) {
1575
37363
  std::string output;
1576
37363
  output.reserve(
1577
    10 +  // We generally insert < 10 separator characters between URL parts
1578
37363
    url.scheme.size() +
1579
37363
    url.username.size() +
1580
37363
    url.password.size() +
1581
37363
    url.host.size() +
1582
37363
    url.query.size() +
1583
37363
    url.fragment.size() +
1584
37363
    url.href.size() +
1585
37363
    std::accumulate(
1586
        url.path.begin(),
1587
        url.path.end(),
1588
        0,
1589
392685
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1590
1591
37363
  output += url.scheme;
1592
37363
  if (url.flags & URL_FLAGS_HAS_HOST) {
1593
37363
    output += "//";
1594
37363
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1595
37363
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1596
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1597
        output += url.username;
1598
      }
1599
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1600
        output += ":" + url.password;
1601
      }
1602
      output += "@";
1603
    }
1604
37363
    output += url.host;
1605
37363
    if (url.port != -1) {
1606
      output += ":" + std::to_string(url.port);
1607
    }
1608
  }
1609
37363
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1610
    output += url.path[0];
1611
  } else {
1612
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1613

37363
          url.path.size() > 1 &&
1614
          url.path[0].empty()) {
1615
      output += "/.";
1616
    }
1617
392685
    for (size_t i = 1; i < url.path.size(); i++) {
1618
355322
      output += "/" + url.path[i];
1619
    }
1620
  }
1621
37363
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1622
    output += "?" + url.query;
1623
  }
1624

37363
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1625
    output += "#" + url.fragment;
1626
  }
1627
37363
  output.shrink_to_fit();
1628
37363
  return output;
1629
}
1630
1631
namespace {
1632
159161
void SetArgs(Environment* env,
1633
             Local<Value> argv[ARG_COUNT],
1634
             const struct url_data& url) {
1635
159161
  Isolate* isolate = env->isolate();
1636
159161
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1637
318322
  argv[ARG_PROTOCOL] =
1638
159161
      url.flags & URL_FLAGS_SPECIAL ?
1639
152780
          GetSpecial(env, url.scheme) :
1640
6381
          OneByteString(isolate, url.scheme.c_str());
1641
159161
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1642
1058
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1643
159161
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1644
1038
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1645
159161
  if (url.flags & URL_FLAGS_HAS_HOST)
1646
306482
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1647
159161
  if (url.flags & URL_FLAGS_HAS_QUERY)
1648
2196
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1649
159161
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1650
976
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1651
159161
  if (url.port > -1)
1652
3406
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1653
159161
  if (url.flags & URL_FLAGS_HAS_PATH)
1654
317360
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1655
159161
}
1656
1657
168018
void Parse(Environment* env,
1658
           Local<Value> recv,
1659
           const char* input,
1660
           size_t len,
1661
           enum url_parse_state state_override,
1662
           Local<Value> base_obj,
1663
           Local<Value> context_obj,
1664
           Local<Function> cb,
1665
           Local<Value> error_cb) {
1666
168018
  Isolate* isolate = env->isolate();
1667
168018
  Local<Context> context = env->context();
1668
168018
  HandleScope handle_scope(isolate);
1669
168018
  Context::Scope context_scope(context);
1670
1671
168018
  const bool has_context = context_obj->IsObject();
1672
168018
  const bool has_base = base_obj->IsObject();
1673
1674
168018
  url_data base;
1675
168018
  url_data url;
1676
168018
  if (has_context)
1677
46200
    url = HarvestContext(env, context_obj.As<Object>());
1678
168018
  if (has_base)
1679
8519
    base = HarvestBase(env, base_obj.As<Object>());
1680
1681
168018
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1682

168018
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1683
46200
      ((state_override != kUnknownState) &&
1684
46200
       (url.flags & URL_FLAGS_TERMINATED)))
1685
24
    return;
1686
1687
  // Define the return value placeholders
1688
167994
  const Local<Value> undef = Undefined(isolate);
1689
167994
  const Local<Value> null = Null(isolate);
1690
167994
  if (!(url.flags & URL_FLAGS_FAILED)) {
1691
    Local<Value> argv[] = {
1692
      undef,
1693
      undef,
1694
      undef,
1695
      undef,
1696
      null,  // host defaults to null
1697
      null,  // port defaults to null
1698
      undef,
1699
      null,  // query defaults to null
1700
      null,  // fragment defaults to null
1701
159161
    };
1702
159161
    SetArgs(env, argv, url);
1703
159161
    USE(cb->Call(context, recv, arraysize(argv), argv));
1704
8833
  } else if (error_cb->IsFunction()) {
1705
17448
    Local<Value> flags = Integer::NewFromUnsigned(isolate, url.flags);
1706
8724
    USE(error_cb.As<Function>()->Call(context, recv, 1, &flags));
1707
  }
1708
}
1709
1710
168018
void Parse(const FunctionCallbackInfo<Value>& args) {
1711
168018
  Environment* env = Environment::GetCurrent(args);
1712
168018
  CHECK_GE(args.Length(), 5);
1713
336036
  CHECK(args[0]->IsString());  // input
1714


453993
  CHECK(args[2]->IsUndefined() ||  // base context
1715
        args[2]->IsNull() ||
1716
        args[2]->IsObject());
1717


474636
  CHECK(args[3]->IsUndefined() ||  // context
1718
        args[3]->IsNull() ||
1719
        args[3]->IsObject());
1720
168018
  CHECK(args[4]->IsFunction());  // complete callback
1721

457854
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1722
1723
168018
  Utf8Value input(env->isolate(), args[0]);
1724
168018
  enum url_parse_state state_override = kUnknownState;
1725
168018
  if (args[1]->IsNumber()) {
1726
168018
    state_override = static_cast<enum url_parse_state>(
1727
336036
        args[1]->Uint32Value(env->context()).FromJust());
1728
  }
1729
1730
336036
  Parse(env, args.This(),
1731
168018
        *input, input.length(),
1732
        state_override,
1733
        args[2],
1734
        args[3],
1735
336036
        args[4].As<Function>(),
1736
        args[5]);
1737
168018
}
1738
1739
82
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1740
82
  Environment* env = Environment::GetCurrent(args);
1741
82
  CHECK_GE(args.Length(), 1);
1742
164
  CHECK(args[0]->IsString());
1743
164
  Utf8Value value(env->isolate(), args[0]);
1744
82
  std::string output;
1745
82
  size_t len = value.length();
1746
82
  output.reserve(len);
1747
593
  for (size_t n = 0; n < len; n++) {
1748
511
    const char ch = (*value)[n];
1749
511
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1750
  }
1751
246
  args.GetReturnValue().Set(
1752
164
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1753
82
}
1754
1755
230
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1756
230
  Environment* env = Environment::GetCurrent(args);
1757
230
  CHECK_GE(args.Length(), 1);
1758
460
  CHECK(args[0]->IsString());
1759
230
  Utf8Value value(env->isolate(), args[0]);
1760
1761
230
  URLHost host;
1762
  // Assuming the host is used for a special scheme.
1763
230
  host.ParseHost(*value, value.length(), true);
1764
230
  if (host.ParsingFailed()) {
1765
13
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1766
13
    return;
1767
  }
1768
217
  std::string out = host.ToStringMove();
1769
651
  args.GetReturnValue().Set(
1770
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1771
}
1772
1773
208
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1774
208
  Environment* env = Environment::GetCurrent(args);
1775
208
  CHECK_GE(args.Length(), 1);
1776
416
  CHECK(args[0]->IsString());
1777
208
  Utf8Value value(env->isolate(), args[0]);
1778
1779
208
  URLHost host;
1780
  // Assuming the host is used for a special scheme.
1781
208
  host.ParseHost(*value, value.length(), true, true);
1782
208
  if (host.ParsingFailed()) {
1783
13
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1784
13
    return;
1785
  }
1786
195
  std::string out = host.ToStringMove();
1787
585
  args.GetReturnValue().Set(
1788
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1789
}
1790
1791
1303
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1792
1303
  Environment* env = Environment::GetCurrent(args);
1793
1303
  CHECK_EQ(args.Length(), 1);
1794
1303
  CHECK(args[0]->IsFunction());
1795
2606
  env->set_url_constructor_function(args[0].As<Function>());
1796
1303
}
1797
1798
1303
void Initialize(Local<Object> target,
1799
                Local<Value> unused,
1800
                Local<Context> context,
1801
                void* priv) {
1802
1303
  Environment* env = Environment::GetCurrent(context);
1803
1303
  env->SetMethod(target, "parse", Parse);
1804
1303
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1805
1303
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1806
1303
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1807
1303
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1808
1809
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1810
35181
  FLAGS(XX)
1811
#undef XX
1812
1813
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1814
54726
  PARSESTATES(XX)
1815
#undef XX
1816
1303
}
1817
}  // namespace
1818
1819
5267
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1820
5267
  registry->Register(Parse);
1821
5267
  registry->Register(EncodeAuthSet);
1822
5267
  registry->Register(DomainToASCII);
1823
5267
  registry->Register(DomainToUnicode);
1824
5267
  registry->Register(SetURLConstructor);
1825
5267
}
1826
1827
8
std::string URL::ToFilePath() const {
1828
8
  if (context_.scheme != "file:") {
1829
1
    return "";
1830
  }
1831
1832
#ifdef _WIN32
1833
  const char* slash = "\\";
1834
  auto is_slash = [] (char ch) {
1835
    return ch == '/' || ch == '\\';
1836
  };
1837
#else
1838
7
  const char* slash = "/";
1839
46
  auto is_slash = [] (char ch) {
1840
46
    return ch == '/';
1841
  };
1842

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1843
7
      context_.host.length() > 0) {
1844
1
    return "";
1845
  }
1846
#endif
1847
12
  std::string decoded_path;
1848
18
  for (const std::string& part : context_.path) {
1849
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1850
58
    for (char& ch : decoded) {
1851
46
      if (is_slash(ch)) {
1852
1
        return "";
1853
      }
1854
    }
1855
12
    decoded_path += slash + decoded;
1856
  }
1857
1858
#ifdef _WIN32
1859
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1860
1861
  // If hostname is set, then we have a UNC path. Pass the hostname through
1862
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1863
  // need to worry about percent encoding because the URL parser will have
1864
  // already taken care of that for us. Note that this only causes IDNs with an
1865
  // appropriate `xn--` prefix to be decoded.
1866
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1867
      context_.host.length() > 0) {
1868
    std::string unicode_host;
1869
    if (!ToUnicode(context_.host, &unicode_host)) {
1870
      return "";
1871
    }
1872
    return "\\\\" + unicode_host + decoded_path;
1873
  }
1874
  // Otherwise, it's a local path that requires a drive letter.
1875
  if (decoded_path.length() < 3) {
1876
    return "";
1877
  }
1878
  if (decoded_path[2] != ':' ||
1879
      !IsASCIIAlpha(decoded_path[1])) {
1880
    return "";
1881
  }
1882
  // Strip out the leading '\'.
1883
  return decoded_path.substr(1);
1884
#else
1885
5
  return decoded_path;
1886
#endif
1887
}
1888
1889
37363
URL URL::FromFilePath(const std::string& file_path) {
1890
74726
  URL url("file://");
1891
74726
  std::string escaped_file_path;
1892
3959160
  for (size_t i = 0; i < file_path.length(); ++i) {
1893
3921797
    escaped_file_path += file_path[i];
1894
3921797
    if (file_path[i] == '%')
1895
12
      escaped_file_path += "25";
1896
  }
1897
37363
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1898
             &url.context_, true, nullptr, false);
1899
37363
  return url;
1900
}
1901
1902
// This function works by calling out to a JS function that creates and
1903
// returns the JS URL object. Be mindful of the JS<->Native boundary
1904
// crossing that is required.
1905
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1906
  Isolate* isolate = env->isolate();
1907
  Local<Context> context = env->context();
1908
  Context::Scope context_scope(context);
1909
1910
  const Local<Value> undef = Undefined(isolate);
1911
  const Local<Value> null = Null(isolate);
1912
1913
  if (context_.flags & URL_FLAGS_FAILED)
1914
    return Local<Value>();
1915
1916
  Local<Value> argv[] = {
1917
    undef,
1918
    undef,
1919
    undef,
1920
    undef,
1921
    null,  // host defaults to null
1922
    null,  // port defaults to null
1923
    undef,
1924
    null,  // query defaults to null
1925
    null,  // fragment defaults to null
1926
  };
1927
  SetArgs(env, argv, context_);
1928
1929
  MaybeLocal<Value> ret;
1930
  {
1931
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1932
1933
    // The SetURLConstructor method must have been called already to
1934
    // set the constructor function used below. SetURLConstructor is
1935
    // called automatically when the internal/url.js module is loaded
1936
    // during the internal/bootstrap/node.js processing.
1937
    ret = env->url_constructor_function()
1938
        ->Call(env->context(), undef, arraysize(argv), argv);
1939
  }
1940
1941
  return ret;
1942
}
1943
1944
}  // namespace url
1945
}  // namespace node
1946
1947
5335
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1948
5267
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)