GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1132 1190 95.1 %
Date: 2022-03-11 04:15:03 Branches: 970 1096 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <numeric>
11
#include <string>
12
#include <vector>
13
14
namespace node {
15
16
using errors::TryCatchScope;
17
18
using url::table_data::hex;
19
using url::table_data::C0_CONTROL_ENCODE_SET;
20
using url::table_data::FRAGMENT_ENCODE_SET;
21
using url::table_data::PATH_ENCODE_SET;
22
using url::table_data::USERINFO_ENCODE_SET;
23
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
24
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
25
26
using v8::Array;
27
using v8::Context;
28
using v8::Function;
29
using v8::FunctionCallbackInfo;
30
using v8::HandleScope;
31
using v8::Int32;
32
using v8::Integer;
33
using v8::Isolate;
34
using v8::Local;
35
using v8::MaybeLocal;
36
using v8::NewStringType;
37
using v8::Null;
38
using v8::Object;
39
using v8::String;
40
using v8::Undefined;
41
using v8::Value;
42
43
141319
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
44
141319
  return String::NewFromUtf8(isolate,
45
                             str.data(),
46
                             NewStringType::kNormal,
47
141319
                             str.length()).ToLocalChecked();
48
}
49
50
namespace url {
51
namespace {
52
53
// https://url.spec.whatwg.org/#eof-code-point
54
constexpr char kEOL = -1;
55
56
// https://url.spec.whatwg.org/#concept-host
57
class URLHost {
58
 public:
59
  ~URLHost();
60
61
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62
  void ParseIPv6Host(const char* input, size_t length);
63
  void ParseOpaqueHost(const char* input, size_t length);
64
  void ParseHost(const char* input,
65
                 size_t length,
66
                 bool is_special,
67
                 bool unicode = false);
68
69
5258
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
70
  std::string ToString() const;
71
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
72
  std::string ToStringMove();
73
74
 private:
75
  enum class HostType {
76
    H_FAILED,
77
    H_DOMAIN,
78
    H_IPV4,
79
    H_IPV6,
80
    H_OPAQUE,
81
  };
82
83
  union Value {
84
    std::string domain_or_opaque;
85
    uint32_t ipv4;
86
    uint16_t ipv6[8];
87
88
5258
    ~Value() {}
89
5258
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
14178
  void Reset() {
96
    using string = std::string;
97
14178
    switch (type_) {
98
4175
      case HostType::H_DOMAIN:
99
      case HostType::H_OPAQUE:
100
4175
        value_.domain_or_opaque.~string();
101
4175
        break;
102
10003
      default:
103
10003
        break;
104
    }
105
14178
    type_ = HostType::H_FAILED;
106
14178
  }
107
108
  // Setting the string members of the union with = is brittle because
109
  // it relies on them being initialized to a state that requires no
110
  // destruction of old data.
111
  // For a long time, that worked well enough because ParseIPv6Host() happens
112
  // to zero-fill `value_`, but that really is relying on standard library
113
  // internals too much.
114
  // These helpers are the easiest solution but we might want to consider
115
  // just not forcing strings into an union.
116
458
  void SetOpaque(std::string&& string) {
117
458
    Reset();
118
458
    type_ = HostType::H_OPAQUE;
119
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
458
  }
121
122
3717
  void SetDomain(std::string&& string) {
123
3717
    Reset();
124
3717
    type_ = HostType::H_DOMAIN;
125
3717
    new(&value_.domain_or_opaque) std::string(std::move(string));
126
3717
  }
127
};
128
129
5258
URLHost::~URLHost() {
130
5258
  Reset();
131
5258
}
132
133
#define ARGS(XX)                                                              \
134
  XX(ARG_FLAGS)                                                               \
135
  XX(ARG_PROTOCOL)                                                            \
136
  XX(ARG_USERNAME)                                                            \
137
  XX(ARG_PASSWORD)                                                            \
138
  XX(ARG_HOST)                                                                \
139
  XX(ARG_PORT)                                                                \
140
  XX(ARG_PATH)                                                                \
141
  XX(ARG_QUERY)                                                               \
142
  XX(ARG_FRAGMENT)                                                            \
143
  XX(ARG_COUNT)  // This one has to be last.
144
145
#define ERR_ARGS(XX)                                                          \
146
  XX(ERR_ARG_FLAGS)                                                           \
147
  XX(ERR_ARG_INPUT)                                                           \
148
149
enum url_cb_args {
150
#define XX(name) name,
151
  ARGS(XX)
152
#undef XX
153
};
154
155
enum url_error_cb_args {
156
#define XX(name) name,
157
  ERR_ARGS(XX)
158
#undef XX
159
};
160
161
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
162
  template <typename T>                                                       \
163
  bool name(const T ch1, const T ch2) {                                \
164
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
165
                  "Character must be wider than " #bits " bits");             \
166
    return (expr);                                                            \
167
  }                                                                           \
168
  template <typename T>                                                       \
169
  bool name(const std::basic_string<T>& str) {                         \
170
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
171
                  "Character must be wider than " #bits " bits");             \
172
    return str.length() >= 2 && name(str[0], str[1]);                         \
173
  }
174
175
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
176

13788849
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
177
178
// https://infra.spec.whatwg.org/#c0-control-or-space
179

290398
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
180
181
// https://infra.spec.whatwg.org/#ascii-digit
182

563892
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
183
184
// https://infra.spec.whatwg.org/#ascii-hex-digit
185


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
186
                               (ch >= 'A' && ch <= 'F') ||
187
                               (ch >= 'a' && ch <= 'f')))
188
189
// https://infra.spec.whatwg.org/#ascii-alpha
190


1248056
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
191
                            (ch >= 'a' && ch <= 'z')))
192
193
// https://infra.spec.whatwg.org/#ascii-alphanumeric
194

548031
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
195
196
// https://infra.spec.whatwg.org/#ascii-lowercase
197
template <typename T>
198
548103
T ASCIILowercase(T ch) {
199
548103
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
200
}
201
202
// https://url.spec.whatwg.org/#forbidden-host-code-point
203









90746
CHAR_TEST(8, IsForbiddenHostCodePoint,
204
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
205
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
206
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
207
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
208
          ch == '^' || ch == '|')
209
210
// https://url.spec.whatwg.org/#windows-drive-letter
211

12412
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
212
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
213
214
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
215

2668
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
216
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
217
218
#undef TWO_CHAR_STRING_TEST
219
220
11610978
bool BitAt(const uint8_t a[], const uint8_t i) {
221
11610978
  return !!(a[i >> 3] & (1 << (i & 7)));
222
}
223
224
// Appends ch to str. If ch position in encode_set is set, the ch will
225
// be percent-encoded then appended.
226
11610978
void AppendOrEscape(std::string* str,
227
                    const unsigned char ch,
228
                    const uint8_t encode_set[]) {
229
11610978
  if (BitAt(encode_set, ch))
230
1925
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
231
  else
232
11609053
    *str += ch;
233
11610978
}
234
235
template <typename T>
236
850
unsigned hex2bin(const T ch) {
237

850
  if (ch >= '0' && ch <= '9')
238
546
    return ch - '0';
239

304
  if (ch >= 'A' && ch <= 'F')
240
172
    return 10 + (ch - 'A');
241

132
  if (ch >= 'a' && ch <= 'f')
242
132
    return 10 + (ch - 'a');
243
  return static_cast<unsigned>(-1);
244
}
245
246
4386
std::string PercentDecode(const char* input, size_t len) {
247
4386
  std::string dest;
248
4386
  if (len == 0)
249
2
    return dest;
250
4384
  dest.reserve(len);
251
4384
  const char* pointer = input;
252
4384
  const char* end = input + len;
253
254
93680
  while (pointer < end) {
255
89296
    const char ch = pointer[0];
256
89296
    size_t remaining = end - pointer - 1;
257


89733
    if (ch != '%' || remaining < 2 ||
258
437
        (ch == '%' &&
259
437
         (!IsASCIIHexDigit(pointer[1]) ||
260
433
          !IsASCIIHexDigit(pointer[2])))) {
261
88871
      dest += ch;
262
88871
      pointer++;
263
88871
      continue;
264
    } else {
265
425
      unsigned a = hex2bin(pointer[1]);
266
425
      unsigned b = hex2bin(pointer[2]);
267
425
      char c = static_cast<char>(a * 16 + b);
268
425
      dest += c;
269
425
      pointer += 3;
270
    }
271
  }
272
4384
  return dest;
273
}
274
275
#define SPECIALS(XX)                                                          \
276
  XX(ftp, 21, "ftp:")                                                         \
277
  XX(file, -1, "file:")                                                       \
278
  XX(http, 80, "http:")                                                       \
279
  XX(https, 443, "https:")                                                    \
280
  XX(ws, 80, "ws:")                                                           \
281
  XX(wss, 443, "wss:")
282
283
329961
bool IsSpecial(const std::string& scheme) {
284
#define V(_, __, name) if (scheme == name) return true;
285



329961
  SPECIALS(V);
286
#undef V
287
6525
  return false;
288
}
289
290
137636
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
291
#define V(key, _, name) if (scheme == name)                                  \
292
    return env->url_special_##key##_string();
293



137636
  SPECIALS(V)
294
#undef V
295
  UNREACHABLE();
296
}
297
298
131807
int NormalizePort(const std::string& scheme, int p) {
299
#define V(_, port, name) if (scheme == name && p == port) return -1;
300









131807
  SPECIALS(V);
301
#undef V
302
11255
  return p;
303
}
304
305
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
306
6666
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
307
6666
  size_t length = end - p;
308
5962
  return length >= 2 &&
309

12664
    IsWindowsDriveLetter(p[0], p[1]) &&
310
36
    (length == 2 ||
311
36
      p[2] == '/' ||
312
14
      p[2] == '\\' ||
313
6
      p[2] == '?' ||
314
6670
      p[2] == '#');
315
}
316
317
#if defined(NODE_HAVE_I18N_SUPPORT)
318
195
bool ToUnicode(const std::string& input, std::string* output) {
319
390
  MaybeStackBuffer<char> buf;
320
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
321
    return false;
322
195
  output->assign(*buf, buf.length());
323
195
  return true;
324
}
325
326
4373
bool ToASCII(const std::string& input, std::string* output) {
327
8746
  MaybeStackBuffer<char> buf;
328
4373
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
329
124
    return false;
330
4249
  if (buf.length() == 0)
331
24
    return false;
332
4225
  output->assign(*buf, buf.length());
333
4225
  return true;
334
}
335
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
336
// Intentional non-ops if ICU is not present.
337
bool ToUnicode(const std::string& input, std::string* output) {
338
  *output = input;
339
  return true;
340
}
341
342
bool ToASCII(const std::string& input, std::string* output) {
343
  *output = input;
344
  return true;
345
}
346
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
347
348
#define NS_IN6ADDRSZ 16
349
350
357
void URLHost::ParseIPv6Host(const char* input, size_t length) {
351
357
  CHECK_EQ(type_, HostType::H_FAILED);
352
353
  unsigned char buf[sizeof(struct in6_addr)];
354
357
  MaybeStackBuffer<char> ipv6(length + 1);
355
357
  *(*ipv6 + length) = 0;
356
357
  memset(buf, 0, sizeof(buf));
357
357
  memcpy(*ipv6, input, sizeof(const char) * length);
358
359
357
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
360
361
357
  if (ret != 0) {
362
92
    return;
363
  }
364
365
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
366
2385
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
367
2120
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
368
  }
369
370
265
  type_ = HostType::H_IPV6;
371
}
372
373
4923
int64_t ParseNumber(const char* start, const char* end) {
374
4923
  unsigned R = 10;
375

4923
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
376
48
    start += 2;
377
48
    R = 16;
378
  }
379
4923
  if (end - start == 0) {
380
8
    return 0;
381

4915
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
382
55
    start++;
383
55
    R = 8;
384
  }
385
4915
  const char* p = start;
386
387
7430
  while (p < end) {
388
6216
    const char ch = p[0];
389

6216
    switch (R) {
390
274
      case 8:
391

274
        if (ch < '0' || ch > '7')
392
29
          return -1;
393
245
        break;
394
5734
      case 10:
395
5734
        if (!IsASCIIDigit(ch))
396
3668
          return -1;
397
2066
        break;
398
208
      case 16:
399
208
        if (!IsASCIIHexDigit(ch))
400
4
          return -1;
401
204
        break;
402
    }
403
2515
    p++;
404
  }
405
1214
  return strtoll(start, nullptr, R);
406
}
407
408
4050
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
409
4050
  CHECK_EQ(type_, HostType::H_FAILED);
410
4050
  *is_ipv4 = false;
411
4050
  const char* pointer = input;
412
4050
  const char* mark = input;
413
4050
  const char* end = pointer + length;
414
4050
  int parts = 0;
415
4050
  uint32_t val = 0;
416
  uint64_t numbers[4];
417
4050
  int tooBigNumbers = 0;
418
4050
  if (length == 0)
419
3745
    return;
420
421
37774
  while (pointer <= end) {
422
37445
    const char ch = pointer < end ? pointer[0] : kEOL;
423
37445
    int64_t remaining = end - pointer - 1;
424

37445
    if (ch == '.' || ch == kEOL) {
425
4939
      if (++parts > static_cast<int>(arraysize(numbers)))
426
4
        return;
427
4935
      if (pointer == mark)
428
12
        return;
429
4923
      int64_t n = ParseNumber(mark, pointer);
430
4923
      if (n < 0)
431
3701
        return;
432
433
1222
      if (n > 255) {
434
112
        tooBigNumbers++;
435
      }
436
1222
      numbers[parts - 1] = n;
437
1222
      mark = pointer + 1;
438

1222
      if (ch == '.' && remaining == 0)
439
4
        break;
440
    }
441
33724
    pointer++;
442
  }
443
333
  CHECK_GT(parts, 0);
444
333
  *is_ipv4 = true;
445
446
  // If any but the last item in numbers is greater than 255, return failure.
447
  // If the last item in numbers is greater than or equal to
448
  // 256^(5 - the number of items in numbers), return failure.
449
329
  if (tooBigNumbers > 1 ||
450

722
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
451
325
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
452
28
    return;
453
  }
454
455
305
  type_ = HostType::H_IPV4;
456
305
  val = static_cast<uint32_t>(numbers[parts - 1]);
457
1096
  for (int n = 0; n < parts - 1; n++) {
458
791
    double b = 3 - n;
459
791
    val +=
460
791
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
461
  }
462
463
305
  value_.ipv4 = val;
464
}
465
466
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
467
520
  CHECK_EQ(type_, HostType::H_FAILED);
468
520
  std::string output;
469
520
  output.reserve(length);
470
3053
  for (size_t i = 0; i < length; i++) {
471
2595
    const char ch = input[i];
472

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
473
62
      return;
474
    } else {
475
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
476
    }
477
  }
478
479
458
  SetOpaque(std::move(output));
480
}
481
482
5258
void URLHost::ParseHost(const char* input,
483
                        size_t length,
484
                        bool is_special,
485
                        bool unicode) {
486
5258
  CHECK_EQ(type_, HostType::H_FAILED);
487
5258
  const char* pointer = input;
488
489
5258
  if (length == 0)
490
1541
    return;
491
492
5258
  if (pointer[0] == '[') {
493
365
    if (pointer[length - 1] != ']')
494
8
      return;
495
357
    return ParseIPv6Host(++pointer, length - 2);
496
  }
497
498
4893
  if (!is_special)
499
520
    return ParseOpaqueHost(input, length);
500
501
  // First, we have to percent decode
502
4373
  std::string decoded = PercentDecode(input, length);
503
504
  // Then we have to punycode toASCII
505
4373
  if (!ToASCII(decoded, &decoded))
506
148
    return;
507
508
  // If any of the following characters are still present, we have to fail
509
92227
  for (size_t n = 0; n < decoded.size(); n++) {
510
88177
    const char ch = decoded[n];
511
88177
    if (IsForbiddenHostCodePoint(ch)) {
512
175
      return;
513
    }
514
  }
515
516
  // Check to see if it's an IPv4 IP address
517
  bool is_ipv4;
518
4050
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
519
4050
  if (is_ipv4)
520
333
    return;
521
522
  // If the unicode flag is set, run the result through punycode ToUnicode
523

3717
  if (unicode && !ToUnicode(decoded, &decoded))
524
    return;
525
526
  // It's not an IPv4 or IPv6 address, it must be a domain
527
3717
  SetDomain(std::move(decoded));
528
}
529
530
// Locates the longest sequence of 0 segments in an IPv6 address
531
// in order to use the :: compression when serializing
532
template <typename T>
533
265
T* FindLongestZeroSequence(T* values, size_t len) {
534
265
  T* start = values;
535
265
  T* end = start + len;
536
265
  T* result = nullptr;
537
538
265
  T* current = nullptr;
539
265
  unsigned counter = 0, longest = 1;
540
541
2385
  while (start < end) {
542
2120
    if (*start == 0) {
543
1805
      if (current == nullptr)
544
279
        current = start;
545
1805
      counter++;
546
    } else {
547
315
      if (counter > longest) {
548
257
        longest = counter;
549
257
        result = current;
550
      }
551
315
      counter = 0;
552
315
      current = nullptr;
553
    }
554
2120
    start++;
555
  }
556
265
  if (counter > longest)
557
6
    result = current;
558
265
  return result;
559
}
560
561
4745
std::string URLHost::ToStringMove() {
562
4745
  std::string return_value;
563
4745
  switch (type_) {
564
4175
    case HostType::H_DOMAIN:
565
    case HostType::H_OPAQUE:
566
4175
      return_value = std::move(value_.domain_or_opaque);
567
4175
      break;
568
570
    default:
569
570
      return_value = ToString();
570
570
      break;
571
  }
572
4745
  Reset();
573
4745
  return return_value;
574
}
575
576
570
std::string URLHost::ToString() const {
577
1140
  std::string dest;
578

570
  switch (type_) {
579
    case HostType::H_DOMAIN:
580
    case HostType::H_OPAQUE:
581
      return value_.domain_or_opaque;
582
305
    case HostType::H_IPV4: {
583
305
      dest.reserve(15);
584
305
      uint32_t value = value_.ipv4;
585
1525
      for (int n = 0; n < 4; n++) {
586
1220
        dest.insert(0, std::to_string(value % 256));
587
1220
        if (n < 3)
588
915
          dest.insert(0, 1, '.');
589
1220
        value /= 256;
590
      }
591
305
      break;
592
    }
593
265
    case HostType::H_IPV6: {
594
265
      dest.reserve(41);
595
265
      dest += '[';
596
265
      const uint16_t* start = &value_.ipv6[0];
597
      const uint16_t* compress_pointer =
598
265
          FindLongestZeroSequence(start, 8);
599
265
      bool ignore0 = false;
600
2385
      for (int n = 0; n <= 7; n++) {
601
2120
        const uint16_t* piece = &value_.ipv6[n];
602

2120
        if (ignore0 && *piece == 0)
603
1785
          continue;
604
596
        else if (ignore0)
605
255
          ignore0 = false;
606
596
        if (compress_pointer == piece) {
607
261
          dest += n == 0 ? "::" : ":";
608
261
          ignore0 = true;
609
261
          continue;
610
        }
611
        char buf[5];
612
335
        snprintf(buf, sizeof(buf), "%x", *piece);
613
335
        dest += buf;
614
335
        if (n < 7)
615
76
          dest += ':';
616
      }
617
265
      dest += ']';
618
265
      break;
619
    }
620
    case HostType::H_FAILED:
621
      break;
622
  }
623
570
  return dest;
624
}
625
626
4916
bool ParseHost(const std::string& input,
627
               std::string* output,
628
               bool is_special,
629
               bool unicode = false) {
630
4916
  if (input.empty()) {
631
94
    output->clear();
632
94
    return true;
633
  }
634
9644
  URLHost host;
635
4822
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
636
4822
  if (host.ParsingFailed())
637
489
    return false;
638
4333
  *output = host.ToStringMove();
639
4333
  return true;
640
}
641
642
8808
std::vector<std::string> FromJSStringArray(Environment* env,
643
                                           Local<Array> array) {
644
8808
  std::vector<std::string> vec;
645
8808
  if (array->Length() > 0)
646
8792
    vec.reserve(array->Length());
647
133684
  for (size_t n = 0; n < array->Length(); n++) {
648
116068
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
649
116068
    if (val->IsString()) {
650
58034
      Utf8Value value(env->isolate(), val.As<String>());
651
58034
      vec.emplace_back(*value, value.length());
652
    }
653
  }
654
8808
  return vec;
655
}
656
657
8808
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
658
8808
  url_data base;
659
8808
  Local<Context> context = env->context();
660
661
  Local<Value> flags =
662
26424
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
663
8808
  if (flags->IsInt32())
664
17616
    base.flags = flags->Int32Value(context).FromJust();
665
666
  Local<Value> port =
667
26424
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
668
8808
  if (port->IsInt32())
669
76
    base.port = port->Int32Value(context).FromJust();
670
671
  Local<Value> scheme =
672
17616
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
673
8808
  base.scheme = Utf8Value(env->isolate(), scheme).out();
674
675
  auto GetStr = [&](std::string url_data::*member,
676
                    int flag,
677
                    Local<String> name,
678
44040
                    bool empty_as_present) {
679
88080
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
680
88080
    if (value->IsString()) {
681
50608
      Utf8Value utf8value(env->isolate(), value.As<String>());
682
25304
      (base.*member).assign(*utf8value, utf8value.length());
683

42920
      if (empty_as_present || value.As<String>()->Length() != 0) {
684
7710
        base.flags |= flag;
685
      }
686
    }
687
52848
  };
688
8808
  GetStr(&url_data::username,
689
         URL_FLAGS_HAS_USERNAME,
690
         env->username_string(),
691
         false);
692
8808
  GetStr(&url_data::password,
693
         URL_FLAGS_HAS_PASSWORD,
694
         env->password_string(),
695
         false);
696
8808
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
697
8808
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
698
8808
  GetStr(&url_data::fragment,
699
         URL_FLAGS_HAS_FRAGMENT,
700
         env->fragment_string(),
701
         true);
702
703
  Local<Value>
704
26424
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
705
8808
  if (path->IsArray()) {
706
8808
    base.flags |= URL_FLAGS_HAS_PATH;
707
8808
    base.path = FromJSStringArray(env, path.As<Array>());
708
  }
709
8808
  return base;
710
}
711
712
41213
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
713
41213
  url_data context;
714
  Local<Value> flags =
715
123639
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
716
41213
  if (flags->IsInt32()) {
717
    static constexpr int32_t kCopyFlagsMask =
718
        URL_FLAGS_SPECIAL |
719
        URL_FLAGS_CANNOT_BE_BASE |
720
        URL_FLAGS_HAS_USERNAME |
721
        URL_FLAGS_HAS_PASSWORD |
722
        URL_FLAGS_HAS_HOST;
723
41213
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
724
  }
725
  Local<Value> scheme =
726
123639
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
727
82426
  if (scheme->IsString()) {
728
82426
    Utf8Value value(env->isolate(), scheme);
729
41213
    context.scheme.assign(*value, value.length());
730
  }
731
  Local<Value> port =
732
123639
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
733
41213
  if (port->IsInt32())
734
243
    context.port = port.As<Int32>()->Value();
735
41213
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
736
    Local<Value> username =
737
221
        context_obj->Get(env->context(),
738
663
                         env->username_string()).ToLocalChecked();
739
442
    CHECK(username->IsString());
740
442
    Utf8Value value(env->isolate(), username);
741
221
    context.username.assign(*value, value.length());
742
  }
743
41213
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
744
    Local<Value> password =
745
209
        context_obj->Get(env->context(),
746
627
                         env->password_string()).ToLocalChecked();
747
418
    CHECK(password->IsString());
748
418
    Utf8Value value(env->isolate(), password);
749
209
    context.password.assign(*value, value.length());
750
  }
751
  Local<Value> host =
752
41213
      context_obj->Get(env->context(),
753
123639
                       env->host_string()).ToLocalChecked();
754
82426
  if (host->IsString()) {
755
82350
    Utf8Value value(env->isolate(), host);
756
41175
    context.host.assign(*value, value.length());
757
  }
758
41213
  return context;
759
}
760
761
// Single dot segment can be ".", "%2e", or "%2E"
762
2440030
bool IsSingleDotSegment(const std::string& str) {
763
2440030
  switch (str.size()) {
764
6958
    case 1:
765
6958
      return str == ".";
766
146194
    case 3:
767
146194
      return str[0] == '%' &&
768

146240
             str[1] == '2' &&
769
146240
             ASCIILowercase(str[2]) == 'e';
770
2286878
    default:
771
2286878
      return false;
772
  }
773
}
774
775
// Double dot segment can be:
776
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
777
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
778
1224794
bool IsDoubleDotSegment(const std::string& str) {
779

1224794
  switch (str.size()) {
780
4967
    case 2:
781
4967
      return str == "..";
782
345451
    case 4:
783

345451
      if (str[0] != '.' && str[0] != '%')
784
345432
        return false;
785
19
      return ((str[0] == '.' &&
786
13
               str[1] == '%' &&
787

8
               str[2] == '2' &&
788
42
               ASCIILowercase(str[3]) == 'e') ||
789
15
              (str[0] == '%' &&
790

12
               str[1] == '2' &&
791
6
               ASCIILowercase(str[2]) == 'e' &&
792
25
               str[3] == '.'));
793
69025
    case 6:
794
69025
      return (str[0] == '%' &&
795

24
              str[1] == '2' &&
796
12
              ASCIILowercase(str[2]) == 'e' &&
797
4
              str[3] == '%' &&
798

69041
              str[4] == '2' &&
799
69029
              ASCIILowercase(str[5]) == 'e');
800
805351
    default:
801
805351
      return false;
802
  }
803
}
804
805
11108
void ShortenUrlPath(struct url_data* url) {
806
11108
  if (url->path.empty()) return;
807


11299
  if (url->path.size() == 1 && url->scheme == "file:" &&
808
584
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
809
10715
  url->path.pop_back();
810
}
811
812
}  // anonymous namespace
813
814
220521
void URL::Parse(const char* input,
815
                size_t len,
816
                enum url_parse_state state_override,
817
                struct url_data* url,
818
                bool has_url,
819
                const struct url_data* base,
820
                bool has_base) {
821
220521
  const char* p = input;
822
220521
  const char* end = input + len;
823
824
220521
  if (!has_url) {
825
145222
    for (const char* ptr = p; ptr < end; ptr++) {
826
145203
      if (IsC0ControlOrSpace(*ptr))
827
56
        p++;
828
      else
829
145147
        break;
830
    }
831
145214
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
832
145195
      if (IsC0ControlOrSpace(*ptr))
833
48
        end--;
834
      else
835
145147
        break;
836
    }
837
145166
    input = p;
838
145166
    len = end - p;
839
  }
840
841
  // The spec says we should strip out any ASCII tabs or newlines.
842
  // In those cases, we create another std::string instance with the filtered
843
  // contents, but in the general case we avoid the overhead.
844
220521
  std::string whitespace_stripped;
845
14008347
  for (const char* ptr = p; ptr < end; ptr++) {
846
13787996
    if (!IsASCIITabOrNewline(*ptr))
847
13787826
      continue;
848
    // Hit tab or newline. Allocate storage, copy what we have until now,
849
    // and then iterate and filter all similar characters out.
850
170
    whitespace_stripped.reserve(len - 1);
851
170
    whitespace_stripped.assign(p, ptr - p);
852
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
853
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
854
853
      if (!IsASCIITabOrNewline(*ptr))
855
769
        whitespace_stripped += *ptr;
856
    }
857
858
    // Update variables like they should have looked like if the string
859
    // had been stripped of whitespace to begin with.
860
170
    input = whitespace_stripped.c_str();
861
170
    len = whitespace_stripped.size();
862
170
    p = input;
863
170
    end = input + len;
864
170
    break;
865
  }
866
867
220521
  bool atflag = false;  // Set when @ has been seen.
868
220521
  bool square_bracket_flag = false;  // Set inside of [...]
869
220521
  bool password_token_seen_flag = false;  // Set after a : after an username.
870
871
220521
  std::string buffer;
872
873
  // Set the initial parse state.
874
220521
  const bool has_state_override = state_override != kUnknownState;
875
220521
  enum url_parse_state state = has_state_override ? state_override :
876
                                                    kSchemeStart;
877
878

220521
  if (state < kSchemeStart || state > kFragment) {
879
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
880
    return;
881
  }
882
883
14519760
  while (p <= end) {
884
14308047
    const char ch = p < end ? p[0] : kEOL;
885
14308047
    bool special = (url->flags & URL_FLAGS_SPECIAL);
886
    bool cannot_be_base;
887

14308047
    bool special_back_slash = (special && ch == '\\');
888
889





14308047
    switch (state) {
890
145249
      case kSchemeStart:
891
145249
        if (IsASCIIAlpha(ch)) {
892
132292
          buffer += ASCIILowercase(ch);
893
132292
          state = kScheme;
894
12957
        } else if (!has_state_override) {
895
12947
          state = kNoScheme;
896
12947
          continue;
897
        } else {
898
10
          url->flags |= URL_FLAGS_FAILED;
899
10
          return;
900
        }
901
132292
        break;
902
548031
      case kScheme:
903


548031
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
904
415739
          buffer += ASCIILowercase(ch);
905

132292
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
906

130279
          if (has_state_override && buffer.size() == 0) {
907
            url->flags |= URL_FLAGS_TERMINATED;
908
            return;
909
          }
910
130279
          buffer += ':';
911
912
130279
          bool new_is_special = IsSpecial(buffer);
913
914
130279
          if (has_state_override) {
915
45
            if ((special != new_is_special) ||
916
45
                ((buffer == "file:") &&
917
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
918
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
919


116
                  (url->port != -1))) ||
920
45
                  (url->scheme == "file:" && url->host.empty())) {
921
32
              url->flags |= URL_FLAGS_TERMINATED;
922
32
              return;
923
            }
924
          }
925
926
130247
          url->scheme = std::move(buffer);
927
130247
          url->port = NormalizePort(url->scheme, url->port);
928
130247
          if (new_is_special) {
929
124428
            url->flags |= URL_FLAGS_SPECIAL;
930
124428
            special = true;
931
          } else {
932
5819
            url->flags &= ~URL_FLAGS_SPECIAL;
933
5819
            special = false;
934
          }
935
          // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
936
          // here always not equals to `\\`. So `special_back_slash` here always
937
          // equals to `false`.
938
130247
          special_back_slash = false;
939
130247
          buffer.clear();
940
130247
          if (has_state_override)
941
33
            return;
942
130214
          if (url->scheme == "file:") {
943
120503
            state = kFile;
944
3908
          } else if (special &&
945

13619
                     has_base &&
946
1039
                     url->scheme == base->scheme) {
947
329
            state = kSpecialRelativeOrAuthority;
948
9382
          } else if (special) {
949
3579
            state = kSpecialAuthoritySlashes;
950

5803
          } else if (p + 1 < end && p[1] == '/') {
951
716
            state = kPathOrAuthority;
952
716
            p++;
953
          } else {
954
5087
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
955
5087
            url->flags |= URL_FLAGS_HAS_PATH;
956
5087
            url->path.emplace_back("");
957
5087
            state = kCannotBeBase;
958
130214
          }
959
2013
        } else if (!has_state_override) {
960
2005
          buffer.clear();
961
2005
          state = kNoScheme;
962
2005
          p = input;
963
2005
          continue;
964
        } else {
965
8
          url->flags |= URL_FLAGS_FAILED;
966
8
          return;
967
        }
968
545953
        break;
969
14952
      case kNoScheme:
970

14952
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
971

14952
        if (!has_base || (cannot_be_base && ch != '#')) {
972
7824
          url->flags |= URL_FLAGS_FAILED;
973
7824
          return;
974

7128
        } else if (cannot_be_base && ch == '#') {
975
28
          url->scheme = base->scheme;
976
28
          if (IsSpecial(url->scheme)) {
977
            url->flags |= URL_FLAGS_SPECIAL;
978
            special = true;
979
          } else {
980
28
            url->flags &= ~URL_FLAGS_SPECIAL;
981
28
            special = false;
982
          }
983

28
          special_back_slash = (special && ch == '\\');
984
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
985
28
            url->flags |= URL_FLAGS_HAS_PATH;
986
28
            url->path = base->path;
987
          }
988
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
989
4
            url->flags |= URL_FLAGS_HAS_QUERY;
990
4
            url->query = base->query;
991
          }
992
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
993
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
994
            url->fragment = base->fragment;
995
          }
996
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
997
28
          state = kFragment;
998

14200
        } else if (has_base &&
999
7100
                   base->scheme != "file:") {
1000
413
          state = kRelative;
1001
413
          continue;
1002
        } else {
1003
6687
          url->scheme = "file:";
1004
6687
          url->flags |= URL_FLAGS_SPECIAL;
1005
6687
          special = true;
1006
6687
          state = kFile;
1007

6687
          special_back_slash = (special && ch == '\\');
1008
6687
          continue;
1009
        }
1010
28
        break;
1011
329
      case kSpecialRelativeOrAuthority:
1012

329
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1013
297
          state = kSpecialAuthorityIgnoreSlashes;
1014
297
          p++;
1015
        } else {
1016
32
          state = kRelative;
1017
32
          continue;
1018
        }
1019
297
        break;
1020
716
      case kPathOrAuthority:
1021
716
        if (ch == '/') {
1022
548
          state = kAuthority;
1023
        } else {
1024
168
          state = kPath;
1025
168
          continue;
1026
        }
1027
548
        break;
1028
445
      case kRelative:
1029
445
        url->scheme = base->scheme;
1030
445
        if (IsSpecial(url->scheme)) {
1031
345
          url->flags |= URL_FLAGS_SPECIAL;
1032
345
          special = true;
1033
        } else {
1034
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1035
100
          special = false;
1036
        }
1037

445
        special_back_slash = (special && ch == '\\');
1038

445
        switch (ch) {
1039
18
          case kEOL:
1040
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1041
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1042
4
              url->username = base->username;
1043
            }
1044
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1045
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1046
4
              url->password = base->password;
1047
            }
1048
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1049
16
              url->flags |= URL_FLAGS_HAS_HOST;
1050
16
              url->host = base->host;
1051
            }
1052
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1053
              url->flags |= URL_FLAGS_HAS_QUERY;
1054
              url->query = base->query;
1055
            }
1056
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1057
18
              url->flags |= URL_FLAGS_HAS_PATH;
1058
18
              url->path = base->path;
1059
            }
1060
18
            url->port = base->port;
1061
18
            break;
1062
154
          case '/':
1063
154
            state = kRelativeSlash;
1064
154
            break;
1065
38
          case '?':
1066
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1067
              url->flags |= URL_FLAGS_HAS_USERNAME;
1068
              url->username = base->username;
1069
            }
1070
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1071
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1072
              url->password = base->password;
1073
            }
1074
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1075
34
              url->flags |= URL_FLAGS_HAS_HOST;
1076
34
              url->host = base->host;
1077
            }
1078
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1079
38
              url->flags |= URL_FLAGS_HAS_PATH;
1080
38
              url->path = base->path;
1081
            }
1082
38
            url->port = base->port;
1083
38
            state = kQuery;
1084
38
            break;
1085
38
          case '#':
1086
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1087
              url->flags |= URL_FLAGS_HAS_USERNAME;
1088
              url->username = base->username;
1089
            }
1090
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1091
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1092
              url->password = base->password;
1093
            }
1094
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1095
34
              url->flags |= URL_FLAGS_HAS_HOST;
1096
34
              url->host = base->host;
1097
            }
1098
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1099
              url->flags |= URL_FLAGS_HAS_QUERY;
1100
              url->query = base->query;
1101
            }
1102
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1103
38
              url->flags |= URL_FLAGS_HAS_PATH;
1104
38
              url->path = base->path;
1105
            }
1106
38
            url->port = base->port;
1107
38
            state = kFragment;
1108
38
            break;
1109
197
          default:
1110
197
            if (special_back_slash) {
1111
18
              state = kRelativeSlash;
1112
            } else {
1113
179
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1114
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1115
1
                url->username = base->username;
1116
              }
1117
179
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1118
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1119
1
                url->password = base->password;
1120
              }
1121
179
              if (base->flags & URL_FLAGS_HAS_HOST) {
1122
159
                url->flags |= URL_FLAGS_HAS_HOST;
1123
159
                url->host = base->host;
1124
              }
1125
179
              if (base->flags & URL_FLAGS_HAS_PATH) {
1126
179
                url->flags |= URL_FLAGS_HAS_PATH;
1127
179
                url->path = base->path;
1128
179
                ShortenUrlPath(url);
1129
              }
1130
179
              url->port = base->port;
1131
179
              state = kPath;
1132
179
              continue;
1133
            }
1134
        }
1135
266
        break;
1136
172
      case kRelativeSlash:
1137


172
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1138
22
          state = kSpecialAuthorityIgnoreSlashes;
1139
150
        } else if (ch == '/') {
1140
6
          state = kAuthority;
1141
        } else {
1142
144
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1143
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1144
8
            url->username = base->username;
1145
          }
1146
144
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1147
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1148
4
            url->password = base->password;
1149
          }
1150
144
          if (base->flags & URL_FLAGS_HAS_HOST) {
1151
136
            url->flags |= URL_FLAGS_HAS_HOST;
1152
136
            url->host = base->host;
1153
          }
1154
144
          url->port = base->port;
1155
144
          state = kPath;
1156
144
          continue;
1157
        }
1158
28
        break;
1159
3579
      case kSpecialAuthoritySlashes:
1160
3579
        state = kSpecialAuthorityIgnoreSlashes;
1161

3579
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1162
3430
          p++;
1163
        } else {
1164
149
          continue;
1165
        }
1166
3430
        break;
1167
3975
      case kSpecialAuthorityIgnoreSlashes:
1168

3975
        if (ch != '/' && ch != '\\') {
1169
3898
          state = kAuthority;
1170
3898
          continue;
1171
        }
1172
77
        break;
1173
93977
      case kAuthority:
1174
93977
        if (ch == '@') {
1175
565
          if (atflag) {
1176
41
            buffer.reserve(buffer.size() + 3);
1177
41
            buffer.insert(0, "%40");
1178
          }
1179
565
          atflag = true;
1180
565
          size_t blen = buffer.size();
1181

565
          if (blen > 0 && buffer[0] != ':') {
1182
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1183
          }
1184
6652
          for (size_t n = 0; n < blen; n++) {
1185
6087
            const char bch = buffer[n];
1186
6087
            if (bch == ':') {
1187
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1188
444
              if (!password_token_seen_flag) {
1189
428
                password_token_seen_flag = true;
1190
428
                continue;
1191
              }
1192
            }
1193
5659
            if (password_token_seen_flag) {
1194
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1195
            } else {
1196
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1197
            }
1198
          }
1199
565
          buffer.clear();
1200

93412
        } else if (ch == kEOL ||
1201
89030
                   ch == '/' ||
1202
88998
                   ch == '?' ||
1203
88980
                   ch == '#' ||
1204
                   special_back_slash) {
1205

4452
          if (atflag && buffer.size() == 0) {
1206
52
            url->flags |= URL_FLAGS_FAILED;
1207
52
            return;
1208
          }
1209
4400
          p -= buffer.size() + 1;
1210
4400
          buffer.clear();
1211
4400
          state = kHost;
1212
        } else {
1213
88960
          buffer += ch;
1214
        }
1215
93925
        break;
1216
85362
      case kHost:
1217
      case kHostname:
1218

85362
        if (has_state_override && url->scheme == "file:") {
1219
12
          state = kFileHost;
1220
12
          continue;
1221

85350
        } else if (ch == ':' && !square_bracket_flag) {
1222
1600
          if (buffer.size() == 0) {
1223
24
            url->flags |= URL_FLAGS_FAILED;
1224
24
            return;
1225
          }
1226
1576
          if (state_override == kHostname) {
1227
4
            return;
1228
          }
1229
1572
          url->flags |= URL_FLAGS_HAS_HOST;
1230
1572
          if (!ParseHost(buffer, &url->host, special)) {
1231
5
            url->flags |= URL_FLAGS_FAILED;
1232
5
            return;
1233
          }
1234
1567
          buffer.clear();
1235
1567
          state = kPort;
1236

83750
        } else if (ch == kEOL ||
1237
80660
                   ch == '/' ||
1238
80620
                   ch == '?' ||
1239
80594
                   ch == '#' ||
1240
                   special_back_slash) {
1241
3180
          p--;
1242

3180
          if (special && buffer.size() == 0) {
1243
21
            url->flags |= URL_FLAGS_FAILED;
1244
21
            return;
1245
          }
1246
331
          if (has_state_override &&
1247

3528
              buffer.size() == 0 &&
1248
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1249
38
               url->port != -1)) {
1250
8
            url->flags |= URL_FLAGS_TERMINATED;
1251
8
            return;
1252
          }
1253
3151
          url->flags |= URL_FLAGS_HAS_HOST;
1254
3151
          if (!ParseHost(buffer, &url->host, special)) {
1255
432
            url->flags |= URL_FLAGS_FAILED;
1256
432
            return;
1257
          }
1258
2719
          buffer.clear();
1259
2719
          state = kPathStart;
1260
2719
          if (has_state_override) {
1261
227
            return;
1262
          }
1263
        } else {
1264
80570
          if (ch == '[')
1265
359
            square_bracket_flag = true;
1266
80570
          if (ch == ']')
1267
355
            square_bracket_flag = false;
1268
80570
          buffer += ch;
1269
        }
1270
84629
        break;
1271
9049
      case kPort:
1272
9049
        if (IsASCIIDigit(ch)) {
1273
7413
          buffer += ch;
1274

1636
        } else if (has_state_override ||
1275
1118
                   ch == kEOL ||
1276
36
                   ch == '/' ||
1277
36
                   ch == '?' ||
1278
36
                   ch == '#' ||
1279
                   special_back_slash) {
1280
1600
          if (buffer.size() > 0) {
1281
1586
            unsigned port = 0;
1282
            // the condition port <= 0xffff prevents integer overflow
1283

8783
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1284
7197
              port = port * 10 + buffer[i] - '0';
1285
1586
            if (port > 0xffff) {
1286
              // TODO(TimothyGu): This hack is currently needed for the host
1287
              // setter since it needs access to hostname if it is valid, and
1288
              // if the FAILED flag is set the entire response to JS layer
1289
              // will be empty.
1290
26
              if (state_override == kHost)
1291
2
                url->port = -1;
1292
              else
1293
24
                url->flags |= URL_FLAGS_FAILED;
1294
26
              return;
1295
            }
1296
            // the port is valid
1297
1560
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1298
1560
            if (url->port == -1)
1299
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1300
1560
            buffer.clear();
1301
14
          } else if (has_state_override) {
1302
            // TODO(TimothyGu): Similar case as above.
1303
6
            if (state_override == kHost)
1304
2
              url->port = -1;
1305
            else
1306
4
              url->flags |= URL_FLAGS_TERMINATED;
1307
6
            return;
1308
          }
1309
1568
          state = kPathStart;
1310
1568
          continue;
1311
        } else {
1312
36
          url->flags |= URL_FLAGS_FAILED;
1313
36
          return;
1314
        }
1315
7413
        break;
1316
127190
      case kFile:
1317
127190
        url->scheme = "file:";
1318
127190
        url->host.clear();
1319
127190
        url->flags |= URL_FLAGS_HAS_HOST;
1320

127190
        if (ch == '/' || ch == '\\') {
1321
120626
          state = kFileSlash;
1322

6564
        } else if (has_base && base->scheme == "file:") {
1323

6545
          switch (ch) {
1324
4
            case kEOL:
1325
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1326
4
                url->host = base->host;
1327
              }
1328
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1329
4
                url->flags |= URL_FLAGS_HAS_PATH;
1330
4
                url->path = base->path;
1331
              }
1332
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1333
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1334
4
                url->query = base->query;
1335
              }
1336
4
              break;
1337
4
            case '?':
1338
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1339
4
                url->host = base->host;
1340
              }
1341
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1342
4
                url->flags |= URL_FLAGS_HAS_PATH;
1343
4
                url->path = base->path;
1344
              }
1345
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1346
4
              url->query.clear();
1347
4
              state = kQuery;
1348
4
              break;
1349
4
            case '#':
1350
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1351
4
                url->host = base->host;
1352
              }
1353
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1354
4
                url->flags |= URL_FLAGS_HAS_PATH;
1355
4
                url->path = base->path;
1356
              }
1357
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1358
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1359
4
                url->query = base->query;
1360
              }
1361
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1362
4
              url->fragment.clear();
1363
4
              state = kFragment;
1364
4
              break;
1365
6533
            default:
1366
6533
              url->query.clear();
1367
6533
              if (base->flags & URL_FLAGS_HAS_HOST) {
1368
6533
                url->host = base->host;
1369
              }
1370
6533
              if (base->flags & URL_FLAGS_HAS_PATH) {
1371
6533
                url->flags |= URL_FLAGS_HAS_PATH;
1372
6533
                url->path = base->path;
1373
              }
1374
6533
              if (!StartsWithWindowsDriveLetter(p, end)) {
1375
6509
                ShortenUrlPath(url);
1376
              } else {
1377
24
                url->path.clear();
1378
              }
1379
6533
              state = kPath;
1380
6533
              continue;
1381
          }
1382
        } else {
1383
19
          state = kPath;
1384
19
          continue;
1385
        }
1386
120638
        break;
1387
120626
      case kFileSlash:
1388

120626
        if (ch == '/' || ch == '\\') {
1389
120459
          state = kFileHost;
1390
        } else {
1391

167
          if (has_base && base->scheme == "file:") {
1392
133
            url->flags |= URL_FLAGS_HAS_HOST;
1393
133
            url->host = base->host;
1394

252
            if (!StartsWithWindowsDriveLetter(p, end) &&
1395
119
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1396
4
              url->flags |= URL_FLAGS_HAS_PATH;
1397
4
              url->path.push_back(base->path[0]);
1398
            }
1399
          }
1400
167
          state = kPath;
1401
167
          continue;
1402
        }
1403
120459
        break;
1404
121576
      case kFileHost:
1405

121576
        if (ch == kEOL ||
1406
1115
            ch == '/' ||
1407
1105
            ch == '\\' ||
1408
1105
            ch == '?' ||
1409
            ch == '#') {
1410
120459
          if (!has_state_override &&
1411

240930
              buffer.size() == 2 &&
1412
22
              IsWindowsDriveLetter(buffer)) {
1413
12
            state = kPath;
1414
120459
          } else if (buffer.size() == 0) {
1415
120266
            url->flags |= URL_FLAGS_HAS_HOST;
1416
120266
            url->host.clear();
1417
120266
            if (has_state_override)
1418
4
              return;
1419
120262
            state = kPathStart;
1420
          } else {
1421
193
            std::string host;
1422
193
            if (!ParseHost(buffer, &host, special)) {
1423
52
              url->flags |= URL_FLAGS_FAILED;
1424
52
              return;
1425
            }
1426
141
            if (host == "localhost")
1427
37
              host.clear();
1428
141
            url->flags |= URL_FLAGS_HAS_HOST;
1429
141
            url->host = host;
1430
141
            if (has_state_override)
1431
4
              return;
1432
137
            buffer.clear();
1433
137
            state = kPathStart;
1434
          }
1435
120411
          continue;
1436
        } else {
1437
1105
          buffer += ch;
1438
        }
1439
1105
        break;
1440
199037
      case kPathStart:
1441
199037
        if (IsSpecial(url->scheme)) {
1442
198489
          state = kPath;
1443

198489
          if (ch != '/' && ch != '\\') {
1444
75310
            continue;
1445
          }
1446

548
        } else if (!has_state_override && ch == '?') {
1447
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1448
6
          url->query.clear();
1449
6
          state = kQuery;
1450

542
        } else if (!has_state_override && ch == '#') {
1451
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1452
6
          url->fragment.clear();
1453
6
          state = kFragment;
1454
536
        } else if (ch != kEOL) {
1455
459
          state = kPath;
1456
459
          if (ch != '/') {
1457
35
            continue;
1458
          }
1459

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1460
2
          url->flags |= URL_FLAGS_HAS_PATH;
1461
2
          url->path.emplace_back("");
1462
        }
1463
123692
        break;
1464
12718330
      case kPath:
1465

12718330
        if (ch == kEOL ||
1466
11494625
            ch == '/' ||
1467
11494555
            special_back_slash ||
1468

11494555
            (!has_state_override && (ch == '?' || ch == '#'))) {
1469
1224794
          if (IsDoubleDotSegment(buffer)) {
1470
4420
            ShortenUrlPath(url);
1471

4420
            if (ch != '/' && !special_back_slash) {
1472
280
              url->flags |= URL_FLAGS_HAS_PATH;
1473
280
              url->path.emplace_back("");
1474
            }
1475
1223484
          } else if (IsSingleDotSegment(buffer) &&
1476

1223484
                     ch != '/' && !special_back_slash) {
1477
718
            url->flags |= URL_FLAGS_HAS_PATH;
1478
718
            url->path.emplace_back("");
1479
1219656
          } else if (!IsSingleDotSegment(buffer)) {
1480
2428623
            if (url->scheme == "file:" &&
1481
1372776
                url->path.empty() &&
1482

2590040
                buffer.size() == 2 &&
1483
100
                IsWindowsDriveLetter(buffer)) {
1484
98
              buffer[1] = ':';
1485
            }
1486
1217264
            url->flags |= URL_FLAGS_HAS_PATH;
1487
1217264
            url->path.emplace_back(std::move(buffer));
1488
          }
1489
1224794
          buffer.clear();
1490
2449588
          if (ch == '?') {
1491
964
            url->flags |= URL_FLAGS_HAS_QUERY;
1492
964
            url->query.clear();
1493
964
            state = kQuery;
1494
1223830
          } else if (ch == '#') {
1495
55
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1496
55
            url->fragment.clear();
1497
55
            state = kFragment;
1498
          }
1499
        } else {
1500
11493536
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1501
        }
1502
12718330
        break;
1503
49583
      case kCannotBeBase:
1504
49583
        switch (ch) {
1505
4
          case '?':
1506
4
            state = kQuery;
1507
4
            break;
1508
10
          case '#':
1509
10
            state = kFragment;
1510
10
            break;
1511
49569
          default:
1512
49569
            if (url->path.empty())
1513
              url->path.emplace_back("");
1514
49569
            else if (ch != kEOL)
1515
44496
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1516
        }
1517
49583
        break;
1518
61614
      case kQuery:
1519

61614
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1520
1175
          url->flags |= URL_FLAGS_HAS_QUERY;
1521
1175
          url->query = std::move(buffer);
1522
1175
          buffer.clear();
1523
1564
          if (ch == '#')
1524
389
            state = kFragment;
1525
        } else {
1526
60439
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1527
                                                QUERY_ENCODE_SET_NONSPECIAL);
1528
        }
1529
61614
        break;
1530
4255
      case kFragment:
1531
4255
        switch (ch) {
1532
604
          case kEOL:
1533
604
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1534
604
            url->fragment = std::move(buffer);
1535
604
            break;
1536
3651
          default:
1537
3651
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1538
        }
1539
4255
        break;
1540
      default:
1541
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1542
        return;
1543
    }
1544
1545
14068562
    p++;
1546
  }
1547
}  // NOLINT(readability/fn_size)
1548
1549
// https://url.spec.whatwg.org/#url-serializing
1550
34142
std::string URL::SerializeURL(const url_data& url,
1551
                              bool exclude = false) {
1552
34142
  std::string output;
1553
34142
  output.reserve(
1554
    10 +  // We generally insert < 10 separator characters between URL parts
1555
34142
    url.scheme.size() +
1556
34142
    url.username.size() +
1557
34142
    url.password.size() +
1558
34142
    url.host.size() +
1559
34142
    url.query.size() +
1560
34142
    url.fragment.size() +
1561
34142
    url.href.size() +
1562
34142
    std::accumulate(
1563
        url.path.begin(),
1564
        url.path.end(),
1565
        0,
1566
360624
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1567
1568
34142
  output += url.scheme;
1569
34142
  if (url.flags & URL_FLAGS_HAS_HOST) {
1570
34142
    output += "//";
1571
34142
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1572
34142
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1573
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1574
        output += url.username;
1575
      }
1576
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1577
        output += ":" + url.password;
1578
      }
1579
      output += "@";
1580
    }
1581
34142
    output += url.host;
1582
34142
    if (url.port != -1) {
1583
      output += ":" + std::to_string(url.port);
1584
    }
1585
  }
1586
34142
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1587
    output += url.path[0];
1588
  } else {
1589
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1590

34142
          url.path.size() > 1 &&
1591
          url.path[0].empty()) {
1592
      output += "/.";
1593
    }
1594
360624
    for (size_t i = 1; i < url.path.size(); i++) {
1595
326482
      output += "/" + url.path[i];
1596
    }
1597
  }
1598
34142
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1599
    output += "?" + url.query;
1600
  }
1601

34142
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1602
    output += "#" + url.fragment;
1603
  }
1604
34142
  output.shrink_to_fit();
1605
34142
  return output;
1606
}
1607
1608
namespace {
1609
143680
void SetArgs(Environment* env,
1610
             Local<Value> argv[ARG_COUNT],
1611
             const struct url_data& url) {
1612
143680
  Isolate* isolate = env->isolate();
1613
143680
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1614
287360
  argv[ARG_PROTOCOL] =
1615
143680
      url.flags & URL_FLAGS_SPECIAL ?
1616
137636
          GetSpecial(env, url.scheme) :
1617
6044
          OneByteString(isolate, url.scheme.c_str());
1618
143680
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1619
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1620
143680
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1621
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1622
143680
  if (url.flags & URL_FLAGS_HAS_HOST)
1623
276664
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1624
143680
  if (url.flags & URL_FLAGS_HAS_QUERY)
1625
2366
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1626
143680
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1627
1200
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1628
143680
  if (url.port > -1)
1629
3400
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1630
143680
  if (url.flags & URL_FLAGS_HAS_PATH)
1631
286168
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1632
143680
}
1633
1634
152208
void Parse(Environment* env,
1635
           Local<Value> recv,
1636
           const char* input,
1637
           size_t len,
1638
           enum url_parse_state state_override,
1639
           Local<Value> base_obj,
1640
           Local<Value> context_obj,
1641
           Local<Function> cb,
1642
           Local<Value> error_cb) {
1643
152208
  Isolate* isolate = env->isolate();
1644
152208
  Local<Context> context = env->context();
1645
152208
  HandleScope handle_scope(isolate);
1646
152208
  Context::Scope context_scope(context);
1647
1648
152208
  const bool has_context = context_obj->IsObject();
1649
152208
  const bool has_base = base_obj->IsObject();
1650
1651
152208
  url_data base;
1652
152208
  url_data url;
1653
152208
  if (has_context)
1654
41213
    url = HarvestContext(env, context_obj.As<Object>());
1655
152208
  if (has_base)
1656
8808
    base = HarvestBase(env, base_obj.As<Object>());
1657
1658
152208
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1659

152208
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1660
41213
      ((state_override != kUnknownState) &&
1661
41213
       (url.flags & URL_FLAGS_TERMINATED)))
1662
44
    return;
1663
1664
  // Define the return value placeholders
1665
152164
  const Local<Value> undef = Undefined(isolate);
1666
152164
  const Local<Value> null = Null(isolate);
1667
152164
  if (!(url.flags & URL_FLAGS_FAILED)) {
1668
    Local<Value> argv[] = {
1669
      undef,
1670
      undef,
1671
      undef,
1672
      undef,
1673
      null,  // host defaults to null
1674
      null,  // port defaults to null
1675
      undef,
1676
      null,  // query defaults to null
1677
      null,  // fragment defaults to null
1678
143680
    };
1679
143680
    SetArgs(env, argv, url);
1680
287360
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1681
8484
  } else if (error_cb->IsFunction()) {
1682
8354
    Local<Value> argv[2] = { undef, undef };
1683
8354
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1684
8354
    argv[ERR_ARG_INPUT] =
1685
16708
      String::NewFromUtf8(env->isolate(), input).ToLocalChecked();
1686
8354
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1687
8354
        .FromMaybe(Local<Value>());
1688
  }
1689
}
1690
1691
152208
void Parse(const FunctionCallbackInfo<Value>& args) {
1692
152208
  Environment* env = Environment::GetCurrent(args);
1693
152208
  CHECK_GE(args.Length(), 5);
1694
304416
  CHECK(args[0]->IsString());  // input
1695


413266
  CHECK(args[2]->IsUndefined() ||  // base context
1696
        args[2]->IsNull() ||
1697
        args[2]->IsObject());
1698


428055
  CHECK(args[3]->IsUndefined() ||  // context
1699
        args[3]->IsNull() ||
1700
        args[3]->IsObject());
1701
152208
  CHECK(args[4]->IsFunction());  // complete callback
1702

415411
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1703
1704
152208
  Utf8Value input(env->isolate(), args[0]);
1705
152208
  enum url_parse_state state_override = kUnknownState;
1706
152208
  if (args[1]->IsNumber()) {
1707
152208
    state_override = static_cast<enum url_parse_state>(
1708
304416
        args[1]->Uint32Value(env->context()).FromJust());
1709
  }
1710
1711
304416
  Parse(env, args.This(),
1712
152208
        *input, input.length(),
1713
        state_override,
1714
        args[2],
1715
        args[3],
1716
304416
        args[4].As<Function>(),
1717
        args[5]);
1718
152208
}
1719
1720
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1721
92
  Environment* env = Environment::GetCurrent(args);
1722
92
  CHECK_GE(args.Length(), 1);
1723
184
  CHECK(args[0]->IsString());
1724
184
  Utf8Value value(env->isolate(), args[0]);
1725
92
  std::string output;
1726
92
  size_t len = value.length();
1727
92
  output.reserve(len);
1728
756
  for (size_t n = 0; n < len; n++) {
1729
664
    const char ch = (*value)[n];
1730
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1731
  }
1732
276
  args.GetReturnValue().Set(
1733
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1734
92
}
1735
1736
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1737
229
  Environment* env = Environment::GetCurrent(args);
1738
229
  CHECK_GE(args.Length(), 1);
1739
458
  CHECK(args[0]->IsString());
1740
229
  Utf8Value value(env->isolate(), args[0]);
1741
1742
229
  URLHost host;
1743
  // Assuming the host is used for a special scheme.
1744
229
  host.ParseHost(*value, value.length(), true);
1745
229
  if (host.ParsingFailed()) {
1746
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1747
12
    return;
1748
  }
1749
217
  std::string out = host.ToStringMove();
1750
651
  args.GetReturnValue().Set(
1751
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1752
}
1753
1754
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1755
207
  Environment* env = Environment::GetCurrent(args);
1756
207
  CHECK_GE(args.Length(), 1);
1757
414
  CHECK(args[0]->IsString());
1758
207
  Utf8Value value(env->isolate(), args[0]);
1759
1760
207
  URLHost host;
1761
  // Assuming the host is used for a special scheme.
1762
207
  host.ParseHost(*value, value.length(), true, true);
1763
207
  if (host.ParsingFailed()) {
1764
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1765
12
    return;
1766
  }
1767
195
  std::string out = host.ToStringMove();
1768
585
  args.GetReturnValue().Set(
1769
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1770
}
1771
1772
626
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1773
626
  Environment* env = Environment::GetCurrent(args);
1774
626
  CHECK_EQ(args.Length(), 1);
1775
626
  CHECK(args[0]->IsFunction());
1776
1252
  env->set_url_constructor_function(args[0].As<Function>());
1777
626
}
1778
1779
626
void Initialize(Local<Object> target,
1780
                Local<Value> unused,
1781
                Local<Context> context,
1782
                void* priv) {
1783
626
  Environment* env = Environment::GetCurrent(context);
1784
626
  env->SetMethod(target, "parse", Parse);
1785
626
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1786
626
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1787
626
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1788
626
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1789
1790
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1791
16902
  FLAGS(XX)
1792
#undef XX
1793
1794
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1795
26292
  PARSESTATES(XX)
1796
#undef XX
1797
626
}
1798
}  // namespace
1799
1800
4958
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1801
4958
  registry->Register(Parse);
1802
4958
  registry->Register(EncodeAuthSet);
1803
4958
  registry->Register(DomainToASCII);
1804
4958
  registry->Register(DomainToUnicode);
1805
4958
  registry->Register(SetURLConstructor);
1806
4958
}
1807
1808
8
std::string URL::ToFilePath() const {
1809
8
  if (context_.scheme != "file:") {
1810
1
    return "";
1811
  }
1812
1813
#ifdef _WIN32
1814
  const char* slash = "\\";
1815
  auto is_slash = [] (char ch) {
1816
    return ch == '/' || ch == '\\';
1817
  };
1818
#else
1819
7
  const char* slash = "/";
1820
46
  auto is_slash = [] (char ch) {
1821
46
    return ch == '/';
1822
  };
1823

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1824
7
      context_.host.length() > 0) {
1825
1
    return "";
1826
  }
1827
#endif
1828
12
  std::string decoded_path;
1829
18
  for (const std::string& part : context_.path) {
1830
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1831
58
    for (char& ch : decoded) {
1832
46
      if (is_slash(ch)) {
1833
1
        return "";
1834
      }
1835
    }
1836
12
    decoded_path += slash + decoded;
1837
  }
1838
1839
#ifdef _WIN32
1840
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1841
1842
  // If hostname is set, then we have a UNC path. Pass the hostname through
1843
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1844
  // need to worry about percent encoding because the URL parser will have
1845
  // already taken care of that for us. Note that this only causes IDNs with an
1846
  // appropriate `xn--` prefix to be decoded.
1847
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1848
      context_.host.length() > 0) {
1849
    std::string unicode_host;
1850
    if (!ToUnicode(context_.host, &unicode_host)) {
1851
      return "";
1852
    }
1853
    return "\\\\" + unicode_host + decoded_path;
1854
  }
1855
  // Otherwise, it's a local path that requires a drive letter.
1856
  if (decoded_path.length() < 3) {
1857
    return "";
1858
  }
1859
  if (decoded_path[2] != ':' ||
1860
      !IsASCIIAlpha(decoded_path[1])) {
1861
    return "";
1862
  }
1863
  // Strip out the leading '\'.
1864
  return decoded_path.substr(1);
1865
#else
1866
5
  return decoded_path;
1867
#endif
1868
}
1869
1870
34142
URL URL::FromFilePath(const std::string& file_path) {
1871
68284
  URL url("file://");
1872
68284
  std::string escaped_file_path;
1873
3622949
  for (size_t i = 0; i < file_path.length(); ++i) {
1874
3588807
    escaped_file_path += file_path[i];
1875
3588807
    if (file_path[i] == '%')
1876
12
      escaped_file_path += "25";
1877
  }
1878
34142
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1879
             &url.context_, true, nullptr, false);
1880
34142
  return url;
1881
}
1882
1883
// This function works by calling out to a JS function that creates and
1884
// returns the JS URL object. Be mindful of the JS<->Native boundary
1885
// crossing that is required.
1886
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1887
  Isolate* isolate = env->isolate();
1888
  Local<Context> context = env->context();
1889
  Context::Scope context_scope(context);
1890
1891
  const Local<Value> undef = Undefined(isolate);
1892
  const Local<Value> null = Null(isolate);
1893
1894
  if (context_.flags & URL_FLAGS_FAILED)
1895
    return Local<Value>();
1896
1897
  Local<Value> argv[] = {
1898
    undef,
1899
    undef,
1900
    undef,
1901
    undef,
1902
    null,  // host defaults to null
1903
    null,  // port defaults to null
1904
    undef,
1905
    null,  // query defaults to null
1906
    null,  // fragment defaults to null
1907
  };
1908
  SetArgs(env, argv, context_);
1909
1910
  MaybeLocal<Value> ret;
1911
  {
1912
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1913
1914
    // The SetURLConstructor method must have been called already to
1915
    // set the constructor function used below. SetURLConstructor is
1916
    // called automatically when the internal/url.js module is loaded
1917
    // during the internal/bootstrap/node.js processing.
1918
    ret = env->url_constructor_function()
1919
        ->Call(env->context(), undef, arraysize(argv), argv);
1920
  }
1921
1922
  return ret;
1923
}
1924
1925
}  // namespace url
1926
}  // namespace node
1927
1928
5019
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1929
4958
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)