GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1128 1186 95.1 %
Date: 2022-03-16 03:14:48 Branches: 970 1096 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <numeric>
11
#include <string>
12
#include <vector>
13
14
namespace node {
15
16
using errors::TryCatchScope;
17
18
using url::table_data::hex;
19
using url::table_data::C0_CONTROL_ENCODE_SET;
20
using url::table_data::FRAGMENT_ENCODE_SET;
21
using url::table_data::PATH_ENCODE_SET;
22
using url::table_data::USERINFO_ENCODE_SET;
23
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
24
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
25
26
using v8::Array;
27
using v8::Context;
28
using v8::Function;
29
using v8::FunctionCallbackInfo;
30
using v8::HandleScope;
31
using v8::Int32;
32
using v8::Integer;
33
using v8::Isolate;
34
using v8::Local;
35
using v8::MaybeLocal;
36
using v8::NewStringType;
37
using v8::Null;
38
using v8::Object;
39
using v8::String;
40
using v8::Undefined;
41
using v8::Value;
42
43
142435
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
44
142435
  return String::NewFromUtf8(isolate,
45
                             str.data(),
46
                             NewStringType::kNormal,
47
142435
                             str.length()).ToLocalChecked();
48
}
49
50
namespace url {
51
namespace {
52
53
// https://url.spec.whatwg.org/#eof-code-point
54
constexpr char kEOL = -1;
55
56
// https://url.spec.whatwg.org/#concept-host
57
class URLHost {
58
 public:
59
  ~URLHost();
60
61
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62
  void ParseIPv6Host(const char* input, size_t length);
63
  void ParseOpaqueHost(const char* input, size_t length);
64
  void ParseHost(const char* input,
65
                 size_t length,
66
                 bool is_special,
67
                 bool unicode = false);
68
69
5258
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
70
  std::string ToString() const;
71
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
72
  std::string ToStringMove();
73
74
 private:
75
  enum class HostType {
76
    H_FAILED,
77
    H_DOMAIN,
78
    H_IPV4,
79
    H_IPV6,
80
    H_OPAQUE,
81
  };
82
83
  union Value {
84
    std::string domain_or_opaque;
85
    uint32_t ipv4;
86
    uint16_t ipv6[8];
87
88
5258
    ~Value() {}
89
5258
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
14178
  void Reset() {
96
    using string = std::string;
97
14178
    switch (type_) {
98
4175
      case HostType::H_DOMAIN:
99
      case HostType::H_OPAQUE:
100
4175
        value_.domain_or_opaque.~string();
101
4175
        break;
102
10003
      default:
103
10003
        break;
104
    }
105
14178
    type_ = HostType::H_FAILED;
106
14178
  }
107
108
  // Setting the string members of the union with = is brittle because
109
  // it relies on them being initialized to a state that requires no
110
  // destruction of old data.
111
  // For a long time, that worked well enough because ParseIPv6Host() happens
112
  // to zero-fill `value_`, but that really is relying on standard library
113
  // internals too much.
114
  // These helpers are the easiest solution but we might want to consider
115
  // just not forcing strings into an union.
116
458
  void SetOpaque(std::string&& string) {
117
458
    Reset();
118
458
    type_ = HostType::H_OPAQUE;
119
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
458
  }
121
122
3717
  void SetDomain(std::string&& string) {
123
3717
    Reset();
124
3717
    type_ = HostType::H_DOMAIN;
125
3717
    new(&value_.domain_or_opaque) std::string(std::move(string));
126
3717
  }
127
};
128
129
5258
URLHost::~URLHost() {
130
5258
  Reset();
131
5258
}
132
133
#define ARGS(XX)                                                              \
134
  XX(ARG_FLAGS)                                                               \
135
  XX(ARG_PROTOCOL)                                                            \
136
  XX(ARG_USERNAME)                                                            \
137
  XX(ARG_PASSWORD)                                                            \
138
  XX(ARG_HOST)                                                                \
139
  XX(ARG_PORT)                                                                \
140
  XX(ARG_PATH)                                                                \
141
  XX(ARG_QUERY)                                                               \
142
  XX(ARG_FRAGMENT)                                                            \
143
  XX(ARG_COUNT)  // This one has to be last.
144
145
enum url_cb_args {
146
#define XX(name) name,
147
  ARGS(XX)
148
#undef XX
149
};
150
151
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
152
  template <typename T>                                                       \
153
  bool name(const T ch1, const T ch2) {                                \
154
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
155
                  "Character must be wider than " #bits " bits");             \
156
    return (expr);                                                            \
157
  }                                                                           \
158
  template <typename T>                                                       \
159
  bool name(const std::basic_string<T>& str) {                         \
160
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
161
                  "Character must be wider than " #bits " bits");             \
162
    return str.length() >= 2 && name(str[0], str[1]);                         \
163
  }
164
165
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166

13840457
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
167
168
// https://infra.spec.whatwg.org/#c0-control-or-space
169

291586
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
170
171
// https://infra.spec.whatwg.org/#ascii-digit
172

566109
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
173
174
// https://infra.spec.whatwg.org/#ascii-hex-digit
175


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
176
                               (ch >= 'A' && ch <= 'F') ||
177
                               (ch >= 'a' && ch <= 'f')))
178
179
// https://infra.spec.whatwg.org/#ascii-alpha
180


1253128
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
181
                            (ch >= 'a' && ch <= 'z')))
182
183
// https://infra.spec.whatwg.org/#ascii-alphanumeric
184

550248
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
185
186
// https://infra.spec.whatwg.org/#ascii-lowercase
187
template <typename T>
188
550320
T ASCIILowercase(T ch) {
189
550320
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
190
}
191
192
// https://url.spec.whatwg.org/#forbidden-host-code-point
193









90746
CHAR_TEST(8, IsForbiddenHostCodePoint,
194
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
195
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
196
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
197
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
198
          ch == '^' || ch == '|')
199
200
// https://url.spec.whatwg.org/#windows-drive-letter
201

12488
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
202
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
203
204
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
205

2692
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
206
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
207
208
#undef TWO_CHAR_STRING_TEST
209
210
11654502
bool BitAt(const uint8_t a[], const uint8_t i) {
211
11654502
  return !!(a[i >> 3] & (1 << (i & 7)));
212
}
213
214
// Appends ch to str. If ch position in encode_set is set, the ch will
215
// be percent-encoded then appended.
216
11654502
void AppendOrEscape(std::string* str,
217
                    const unsigned char ch,
218
                    const uint8_t encode_set[]) {
219
11654502
  if (BitAt(encode_set, ch))
220
1926
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
221
  else
222
11652576
    *str += ch;
223
11654502
}
224
225
template <typename T>
226
850
unsigned hex2bin(const T ch) {
227

850
  if (ch >= '0' && ch <= '9')
228
546
    return ch - '0';
229

304
  if (ch >= 'A' && ch <= 'F')
230
172
    return 10 + (ch - 'A');
231

132
  if (ch >= 'a' && ch <= 'f')
232
132
    return 10 + (ch - 'a');
233
  UNREACHABLE();
234
}
235
236
4386
std::string PercentDecode(const char* input, size_t len) {
237
4386
  std::string dest;
238
4386
  if (len == 0)
239
2
    return dest;
240
4384
  dest.reserve(len);
241
4384
  const char* pointer = input;
242
4384
  const char* end = input + len;
243
244
93680
  while (pointer < end) {
245
89296
    const char ch = pointer[0];
246
89296
    size_t remaining = end - pointer - 1;
247


89733
    if (ch != '%' || remaining < 2 ||
248
437
        (ch == '%' &&
249
437
         (!IsASCIIHexDigit(pointer[1]) ||
250
433
          !IsASCIIHexDigit(pointer[2])))) {
251
88871
      dest += ch;
252
88871
      pointer++;
253
88871
      continue;
254
    } else {
255
425
      unsigned a = hex2bin(pointer[1]);
256
425
      unsigned b = hex2bin(pointer[2]);
257
425
      char c = static_cast<char>(a * 16 + b);
258
425
      dest += c;
259
425
      pointer += 3;
260
    }
261
  }
262
4384
  return dest;
263
}
264
265
#define SPECIALS(XX)                                                          \
266
  XX(ftp, 21, "ftp:")                                                         \
267
  XX(file, -1, "file:")                                                       \
268
  XX(http, 80, "http:")                                                       \
269
  XX(https, 443, "https:")                                                    \
270
  XX(ws, 80, "ws:")                                                           \
271
  XX(wss, 443, "wss:")
272
273
331579
bool IsSpecial(const std::string& scheme) {
274
#define V(_, __, name) if (scheme == name) return true;
275



331579
  SPECIALS(V);
276
#undef V
277
6525
  return false;
278
}
279
280
138752
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
281
#define V(key, _, name) if (scheme == name)                                  \
282
    return env->url_special_##key##_string();
283



138752
  SPECIALS(V)
284
#undef V
285
  UNREACHABLE();
286
}
287
288
132361
int NormalizePort(const std::string& scheme, int p) {
289
#define V(_, port, name) if (scheme == name && p == port) return -1;
290









132361
  SPECIALS(V);
291
#undef V
292
11255
  return p;
293
}
294
295
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
296
6704
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
297
6704
  size_t length = end - p;
298
6000
  return length >= 2 &&
299

12740
    IsWindowsDriveLetter(p[0], p[1]) &&
300
36
    (length == 2 ||
301
36
      p[2] == '/' ||
302
14
      p[2] == '\\' ||
303
6
      p[2] == '?' ||
304
6708
      p[2] == '#');
305
}
306
307
#if defined(NODE_HAVE_I18N_SUPPORT)
308
195
bool ToUnicode(const std::string& input, std::string* output) {
309
390
  MaybeStackBuffer<char> buf;
310
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
311
    return false;
312
195
  output->assign(*buf, buf.length());
313
195
  return true;
314
}
315
316
4373
bool ToASCII(const std::string& input, std::string* output) {
317
8746
  MaybeStackBuffer<char> buf;
318
4373
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
319
124
    return false;
320
4249
  if (buf.length() == 0)
321
24
    return false;
322
4225
  output->assign(*buf, buf.length());
323
4225
  return true;
324
}
325
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
326
// Intentional non-ops if ICU is not present.
327
bool ToUnicode(const std::string& input, std::string* output) {
328
  *output = input;
329
  return true;
330
}
331
332
bool ToASCII(const std::string& input, std::string* output) {
333
  *output = input;
334
  return true;
335
}
336
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
337
338
#define NS_IN6ADDRSZ 16
339
340
357
void URLHost::ParseIPv6Host(const char* input, size_t length) {
341
357
  CHECK_EQ(type_, HostType::H_FAILED);
342
343
  unsigned char buf[sizeof(struct in6_addr)];
344
357
  MaybeStackBuffer<char> ipv6(length + 1);
345
357
  *(*ipv6 + length) = 0;
346
357
  memset(buf, 0, sizeof(buf));
347
357
  memcpy(*ipv6, input, sizeof(const char) * length);
348
349
357
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
350
351
357
  if (ret != 0) {
352
92
    return;
353
  }
354
355
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
356
2385
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
357
2120
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
358
  }
359
360
265
  type_ = HostType::H_IPV6;
361
}
362
363
4923
int64_t ParseNumber(const char* start, const char* end) {
364
4923
  unsigned R = 10;
365

4923
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
366
48
    start += 2;
367
48
    R = 16;
368
  }
369
4923
  if (end - start == 0) {
370
8
    return 0;
371

4915
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
372
55
    start++;
373
55
    R = 8;
374
  }
375
4915
  const char* p = start;
376
377
7430
  while (p < end) {
378
6216
    const char ch = p[0];
379

6216
    switch (R) {
380
274
      case 8:
381

274
        if (ch < '0' || ch > '7')
382
29
          return -1;
383
245
        break;
384
5734
      case 10:
385
5734
        if (!IsASCIIDigit(ch))
386
3668
          return -1;
387
2066
        break;
388
208
      case 16:
389
208
        if (!IsASCIIHexDigit(ch))
390
4
          return -1;
391
204
        break;
392
    }
393
2515
    p++;
394
  }
395
1214
  return strtoll(start, nullptr, R);
396
}
397
398
4050
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
399
4050
  CHECK_EQ(type_, HostType::H_FAILED);
400
4050
  *is_ipv4 = false;
401
4050
  const char* pointer = input;
402
4050
  const char* mark = input;
403
4050
  const char* end = pointer + length;
404
4050
  int parts = 0;
405
4050
  uint32_t val = 0;
406
  uint64_t numbers[4];
407
4050
  int tooBigNumbers = 0;
408
4050
  if (length == 0)
409
3745
    return;
410
411
37774
  while (pointer <= end) {
412
37445
    const char ch = pointer < end ? pointer[0] : kEOL;
413
37445
    int64_t remaining = end - pointer - 1;
414

37445
    if (ch == '.' || ch == kEOL) {
415
4939
      if (++parts > static_cast<int>(arraysize(numbers)))
416
4
        return;
417
4935
      if (pointer == mark)
418
12
        return;
419
4923
      int64_t n = ParseNumber(mark, pointer);
420
4923
      if (n < 0)
421
3701
        return;
422
423
1222
      if (n > 255) {
424
112
        tooBigNumbers++;
425
      }
426
1222
      numbers[parts - 1] = n;
427
1222
      mark = pointer + 1;
428

1222
      if (ch == '.' && remaining == 0)
429
4
        break;
430
    }
431
33724
    pointer++;
432
  }
433
333
  CHECK_GT(parts, 0);
434
333
  *is_ipv4 = true;
435
436
  // If any but the last item in numbers is greater than 255, return failure.
437
  // If the last item in numbers is greater than or equal to
438
  // 256^(5 - the number of items in numbers), return failure.
439
329
  if (tooBigNumbers > 1 ||
440

722
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
441
325
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
442
28
    return;
443
  }
444
445
305
  type_ = HostType::H_IPV4;
446
305
  val = static_cast<uint32_t>(numbers[parts - 1]);
447
1096
  for (int n = 0; n < parts - 1; n++) {
448
791
    double b = 3 - n;
449
791
    val +=
450
791
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
451
  }
452
453
305
  value_.ipv4 = val;
454
}
455
456
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
457
520
  CHECK_EQ(type_, HostType::H_FAILED);
458
520
  std::string output;
459
520
  output.reserve(length);
460
3053
  for (size_t i = 0; i < length; i++) {
461
2595
    const char ch = input[i];
462

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
463
62
      return;
464
    } else {
465
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
466
    }
467
  }
468
469
458
  SetOpaque(std::move(output));
470
}
471
472
5258
void URLHost::ParseHost(const char* input,
473
                        size_t length,
474
                        bool is_special,
475
                        bool unicode) {
476
5258
  CHECK_EQ(type_, HostType::H_FAILED);
477
5258
  const char* pointer = input;
478
479
5258
  if (length == 0)
480
1541
    return;
481
482
5258
  if (pointer[0] == '[') {
483
365
    if (pointer[length - 1] != ']')
484
8
      return;
485
357
    return ParseIPv6Host(++pointer, length - 2);
486
  }
487
488
4893
  if (!is_special)
489
520
    return ParseOpaqueHost(input, length);
490
491
  // First, we have to percent decode
492
4373
  std::string decoded = PercentDecode(input, length);
493
494
  // Then we have to punycode toASCII
495
4373
  if (!ToASCII(decoded, &decoded))
496
148
    return;
497
498
  // If any of the following characters are still present, we have to fail
499
92227
  for (size_t n = 0; n < decoded.size(); n++) {
500
88177
    const char ch = decoded[n];
501
88177
    if (IsForbiddenHostCodePoint(ch)) {
502
175
      return;
503
    }
504
  }
505
506
  // Check to see if it's an IPv4 IP address
507
  bool is_ipv4;
508
4050
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
509
4050
  if (is_ipv4)
510
333
    return;
511
512
  // If the unicode flag is set, run the result through punycode ToUnicode
513

3717
  if (unicode && !ToUnicode(decoded, &decoded))
514
    return;
515
516
  // It's not an IPv4 or IPv6 address, it must be a domain
517
3717
  SetDomain(std::move(decoded));
518
}
519
520
// Locates the longest sequence of 0 segments in an IPv6 address
521
// in order to use the :: compression when serializing
522
template <typename T>
523
265
T* FindLongestZeroSequence(T* values, size_t len) {
524
265
  T* start = values;
525
265
  T* end = start + len;
526
265
  T* result = nullptr;
527
528
265
  T* current = nullptr;
529
265
  unsigned counter = 0, longest = 1;
530
531
2385
  while (start < end) {
532
2120
    if (*start == 0) {
533
1805
      if (current == nullptr)
534
279
        current = start;
535
1805
      counter++;
536
    } else {
537
315
      if (counter > longest) {
538
257
        longest = counter;
539
257
        result = current;
540
      }
541
315
      counter = 0;
542
315
      current = nullptr;
543
    }
544
2120
    start++;
545
  }
546
265
  if (counter > longest)
547
6
    result = current;
548
265
  return result;
549
}
550
551
4745
std::string URLHost::ToStringMove() {
552
4745
  std::string return_value;
553
4745
  switch (type_) {
554
4175
    case HostType::H_DOMAIN:
555
    case HostType::H_OPAQUE:
556
4175
      return_value = std::move(value_.domain_or_opaque);
557
4175
      break;
558
570
    default:
559
570
      return_value = ToString();
560
570
      break;
561
  }
562
4745
  Reset();
563
4745
  return return_value;
564
}
565
566
570
std::string URLHost::ToString() const {
567
1140
  std::string dest;
568

570
  switch (type_) {
569
    case HostType::H_DOMAIN:
570
    case HostType::H_OPAQUE:
571
      return value_.domain_or_opaque;
572
305
    case HostType::H_IPV4: {
573
305
      dest.reserve(15);
574
305
      uint32_t value = value_.ipv4;
575
1525
      for (int n = 0; n < 4; n++) {
576
1220
        dest.insert(0, std::to_string(value % 256));
577
1220
        if (n < 3)
578
915
          dest.insert(0, 1, '.');
579
1220
        value /= 256;
580
      }
581
305
      break;
582
    }
583
265
    case HostType::H_IPV6: {
584
265
      dest.reserve(41);
585
265
      dest += '[';
586
265
      const uint16_t* start = &value_.ipv6[0];
587
      const uint16_t* compress_pointer =
588
265
          FindLongestZeroSequence(start, 8);
589
265
      bool ignore0 = false;
590
2385
      for (int n = 0; n <= 7; n++) {
591
2120
        const uint16_t* piece = &value_.ipv6[n];
592

2120
        if (ignore0 && *piece == 0)
593
1785
          continue;
594
596
        else if (ignore0)
595
255
          ignore0 = false;
596
596
        if (compress_pointer == piece) {
597
261
          dest += n == 0 ? "::" : ":";
598
261
          ignore0 = true;
599
261
          continue;
600
        }
601
        char buf[5];
602
335
        snprintf(buf, sizeof(buf), "%x", *piece);
603
335
        dest += buf;
604
335
        if (n < 7)
605
76
          dest += ':';
606
      }
607
265
      dest += ']';
608
265
      break;
609
    }
610
    case HostType::H_FAILED:
611
      break;
612
  }
613
570
  return dest;
614
}
615
616
4916
bool ParseHost(const std::string& input,
617
               std::string* output,
618
               bool is_special,
619
               bool unicode = false) {
620
4916
  if (input.empty()) {
621
94
    output->clear();
622
94
    return true;
623
  }
624
9644
  URLHost host;
625
4822
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
626
4822
  if (host.ParsingFailed())
627
489
    return false;
628
4333
  *output = host.ToStringMove();
629
4333
  return true;
630
}
631
632
8846
std::vector<std::string> FromJSStringArray(Environment* env,
633
                                           Local<Array> array) {
634
8846
  std::vector<std::string> vec;
635
8846
  if (array->Length() > 0)
636
8830
    vec.reserve(array->Length());
637
134236
  for (size_t n = 0; n < array->Length(); n++) {
638
116544
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
639
116544
    if (val->IsString()) {
640
58272
      Utf8Value value(env->isolate(), val.As<String>());
641
58272
      vec.emplace_back(*value, value.length());
642
    }
643
  }
644
8846
  return vec;
645
}
646
647
8846
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
648
8846
  url_data base;
649
8846
  Local<Context> context = env->context();
650
651
  Local<Value> flags =
652
26538
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
653
8846
  if (flags->IsInt32())
654
17692
    base.flags = flags->Int32Value(context).FromJust();
655
656
  Local<Value> port =
657
26538
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
658
8846
  if (port->IsInt32())
659
76
    base.port = port->Int32Value(context).FromJust();
660
661
  Local<Value> scheme =
662
17692
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
663
8846
  base.scheme = Utf8Value(env->isolate(), scheme).out();
664
665
  auto GetStr = [&](std::string url_data::*member,
666
                    int flag,
667
                    Local<String> name,
668
44230
                    bool empty_as_present) {
669
88460
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
670
88460
    if (value->IsString()) {
671
50836
      Utf8Value utf8value(env->isolate(), value.As<String>());
672
25418
      (base.*member).assign(*utf8value, utf8value.length());
673

43110
      if (empty_as_present || value.As<String>()->Length() != 0) {
674
7748
        base.flags |= flag;
675
      }
676
    }
677
53076
  };
678
8846
  GetStr(&url_data::username,
679
         URL_FLAGS_HAS_USERNAME,
680
         env->username_string(),
681
         false);
682
8846
  GetStr(&url_data::password,
683
         URL_FLAGS_HAS_PASSWORD,
684
         env->password_string(),
685
         false);
686
8846
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
687
8846
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
688
8846
  GetStr(&url_data::fragment,
689
         URL_FLAGS_HAS_FRAGMENT,
690
         env->fragment_string(),
691
         true);
692
693
  Local<Value>
694
26538
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
695
8846
  if (path->IsArray()) {
696
8846
    base.flags |= URL_FLAGS_HAS_PATH;
697
8846
    base.path = FromJSStringArray(env, path.As<Array>());
698
  }
699
8846
  return base;
700
}
701
702
41730
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
703
41730
  url_data context;
704
  Local<Value> flags =
705
125190
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
706
41730
  if (flags->IsInt32()) {
707
    static constexpr int32_t kCopyFlagsMask =
708
        URL_FLAGS_SPECIAL |
709
        URL_FLAGS_CANNOT_BE_BASE |
710
        URL_FLAGS_HAS_USERNAME |
711
        URL_FLAGS_HAS_PASSWORD |
712
        URL_FLAGS_HAS_HOST;
713
41730
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
714
  }
715
  Local<Value> scheme =
716
125190
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
717
83460
  if (scheme->IsString()) {
718
83460
    Utf8Value value(env->isolate(), scheme);
719
41730
    context.scheme.assign(*value, value.length());
720
  }
721
  Local<Value> port =
722
125190
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
723
41730
  if (port->IsInt32())
724
243
    context.port = port.As<Int32>()->Value();
725
41730
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
726
    Local<Value> username =
727
221
        context_obj->Get(env->context(),
728
663
                         env->username_string()).ToLocalChecked();
729
442
    CHECK(username->IsString());
730
442
    Utf8Value value(env->isolate(), username);
731
221
    context.username.assign(*value, value.length());
732
  }
733
41730
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
734
    Local<Value> password =
735
209
        context_obj->Get(env->context(),
736
627
                         env->password_string()).ToLocalChecked();
737
418
    CHECK(password->IsString());
738
418
    Utf8Value value(env->isolate(), password);
739
209
    context.password.assign(*value, value.length());
740
  }
741
  Local<Value> host =
742
41730
      context_obj->Get(env->context(),
743
125190
                       env->host_string()).ToLocalChecked();
744
83460
  if (host->IsString()) {
745
83384
    Utf8Value value(env->isolate(), host);
746
41692
    context.host.assign(*value, value.length());
747
  }
748
41730
  return context;
749
}
750
751
// Single dot segment can be ".", "%2e", or "%2E"
752
2449440
bool IsSingleDotSegment(const std::string& str) {
753
2449440
  switch (str.size()) {
754
6980
    case 1:
755
6980
      return str == ".";
756
146226
    case 3:
757
146226
      return str[0] == '%' &&
758

146272
             str[1] == '2' &&
759
146272
             ASCIILowercase(str[2]) == 'e';
760
2296234
    default:
761
2296234
      return false;
762
  }
763
}
764
765
// Double dot segment can be:
766
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
767
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
768
1229526
bool IsDoubleDotSegment(const std::string& str) {
769

1229526
  switch (str.size()) {
770
4994
    case 2:
771
4994
      return str == "..";
772
346903
    case 4:
773

346903
      if (str[0] != '.' && str[0] != '%')
774
346884
        return false;
775
19
      return ((str[0] == '.' &&
776
13
               str[1] == '%' &&
777

8
               str[2] == '2' &&
778
42
               ASCIILowercase(str[3]) == 'e') ||
779
15
              (str[0] == '%' &&
780

12
               str[1] == '2' &&
781
6
               ASCIILowercase(str[2]) == 'e' &&
782
25
               str[3] == '.'));
783
69333
    case 6:
784
69333
      return (str[0] == '%' &&
785

24
              str[1] == '2' &&
786
12
              ASCIILowercase(str[2]) == 'e' &&
787
4
              str[3] == '%' &&
788

69349
              str[4] == '2' &&
789
69337
              ASCIILowercase(str[5]) == 'e');
790
808296
    default:
791
808296
      return false;
792
  }
793
}
794
795
11173
void ShortenUrlPath(struct url_data* url) {
796
11173
  if (url->path.empty()) return;
797


11367
  if (url->path.size() == 1 && url->scheme == "file:" &&
798
590
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
799
10777
  url->path.pop_back();
800
}
801
802
}  // anonymous namespace
803
804
221625
void URL::Parse(const char* input,
805
                size_t len,
806
                enum url_parse_state state_override,
807
                struct url_data* url,
808
                bool has_url,
809
                const struct url_data* base,
810
                bool has_base) {
811
221625
  const char* p = input;
812
221625
  const char* end = input + len;
813
814
221625
  if (!has_url) {
815
145816
    for (const char* ptr = p; ptr < end; ptr++) {
816
145797
      if (IsC0ControlOrSpace(*ptr))
817
56
        p++;
818
      else
819
145741
        break;
820
    }
821
145808
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
822
145789
      if (IsC0ControlOrSpace(*ptr))
823
48
        end--;
824
      else
825
145741
        break;
826
    }
827
145760
    input = p;
828
145760
    len = end - p;
829
  }
830
831
  // The spec says we should strip out any ASCII tabs or newlines.
832
  // In those cases, we create another std::string instance with the filtered
833
  // contents, but in the general case we avoid the overhead.
834
221625
  std::string whitespace_stripped;
835
14061059
  for (const char* ptr = p; ptr < end; ptr++) {
836
13839604
    if (!IsASCIITabOrNewline(*ptr))
837
13839434
      continue;
838
    // Hit tab or newline. Allocate storage, copy what we have until now,
839
    // and then iterate and filter all similar characters out.
840
170
    whitespace_stripped.reserve(len - 1);
841
170
    whitespace_stripped.assign(p, ptr - p);
842
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
843
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
844
853
      if (!IsASCIITabOrNewline(*ptr))
845
769
        whitespace_stripped += *ptr;
846
    }
847
848
    // Update variables like they should have looked like if the string
849
    // had been stripped of whitespace to begin with.
850
170
    input = whitespace_stripped.c_str();
851
170
    len = whitespace_stripped.size();
852
170
    p = input;
853
170
    end = input + len;
854
170
    break;
855
  }
856
857
221625
  bool atflag = false;  // Set when @ has been seen.
858
221625
  bool square_bracket_flag = false;  // Set inside of [...]
859
221625
  bool password_token_seen_flag = false;  // Set after a : after an username.
860
861
221625
  std::string buffer;
862
863
  // Set the initial parse state.
864
221625
  const bool has_state_override = state_override != kUnknownState;
865
221625
  enum url_parse_state state = has_state_override ? state_override :
866
                                                    kSchemeStart;
867
868

221625
  if (state < kSchemeStart || state > kFragment) {
869
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
870
    return;
871
  }
872
873
14574733
  while (p <= end) {
874
14361918
    const char ch = p < end ? p[0] : kEOL;
875
14361918
    bool special = (url->flags & URL_FLAGS_SPECIAL);
876
    bool cannot_be_base;
877

14361918
    bool special_back_slash = (special && ch == '\\');
878
879





14361918
    switch (state) {
880
145843
      case kSchemeStart:
881
145843
        if (IsASCIIAlpha(ch)) {
882
132847
          buffer += ASCIILowercase(ch);
883
132847
          state = kScheme;
884
12996
        } else if (!has_state_override) {
885
12986
          state = kNoScheme;
886
12986
          continue;
887
        } else {
888
10
          url->flags |= URL_FLAGS_FAILED;
889
10
          return;
890
        }
891
132847
        break;
892
550248
      case kScheme:
893


550248
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
894
417401
          buffer += ASCIILowercase(ch);
895

132847
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
896

130833
          if (has_state_override && buffer.size() == 0) {
897
            url->flags |= URL_FLAGS_TERMINATED;
898
            return;
899
          }
900
130833
          buffer += ':';
901
902
130833
          bool new_is_special = IsSpecial(buffer);
903
904
130833
          if (has_state_override) {
905
45
            if ((special != new_is_special) ||
906
45
                ((buffer == "file:") &&
907
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
908
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
909


116
                  (url->port != -1))) ||
910
45
                  (url->scheme == "file:" && url->host.empty())) {
911
32
              url->flags |= URL_FLAGS_TERMINATED;
912
32
              return;
913
            }
914
          }
915
916
130801
          url->scheme = std::move(buffer);
917
130801
          url->port = NormalizePort(url->scheme, url->port);
918
130801
          if (new_is_special) {
919
124982
            url->flags |= URL_FLAGS_SPECIAL;
920
124982
            special = true;
921
          } else {
922
5819
            url->flags &= ~URL_FLAGS_SPECIAL;
923
5819
            special = false;
924
          }
925
          // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
926
          // here always not equals to `\\`. So `special_back_slash` here always
927
          // equals to `false`.
928
130801
          special_back_slash = false;
929
130801
          buffer.clear();
930
130801
          if (has_state_override)
931
33
            return;
932
130768
          if (url->scheme == "file:") {
933
121057
            state = kFile;
934
3908
          } else if (special &&
935

13619
                     has_base &&
936
1039
                     url->scheme == base->scheme) {
937
329
            state = kSpecialRelativeOrAuthority;
938
9382
          } else if (special) {
939
3579
            state = kSpecialAuthoritySlashes;
940

5803
          } else if (p + 1 < end && p[1] == '/') {
941
716
            state = kPathOrAuthority;
942
716
            p++;
943
          } else {
944
5087
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
945
5087
            url->flags |= URL_FLAGS_HAS_PATH;
946
5087
            url->path.emplace_back("");
947
5087
            state = kCannotBeBase;
948
130768
          }
949
2014
        } else if (!has_state_override) {
950
2006
          buffer.clear();
951
2006
          state = kNoScheme;
952
2006
          p = input;
953
2006
          continue;
954
        } else {
955
8
          url->flags |= URL_FLAGS_FAILED;
956
8
          return;
957
        }
958
548169
        break;
959
14992
      case kNoScheme:
960

14992
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
961

14992
        if (!has_base || (cannot_be_base && ch != '#')) {
962
7826
          url->flags |= URL_FLAGS_FAILED;
963
7826
          return;
964

7166
        } else if (cannot_be_base && ch == '#') {
965
28
          url->scheme = base->scheme;
966
28
          if (IsSpecial(url->scheme)) {
967
            url->flags |= URL_FLAGS_SPECIAL;
968
            special = true;
969
          } else {
970
28
            url->flags &= ~URL_FLAGS_SPECIAL;
971
28
            special = false;
972
          }
973

28
          special_back_slash = (special && ch == '\\');
974
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
975
28
            url->flags |= URL_FLAGS_HAS_PATH;
976
28
            url->path = base->path;
977
          }
978
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
979
4
            url->flags |= URL_FLAGS_HAS_QUERY;
980
4
            url->query = base->query;
981
          }
982
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
983
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
984
            url->fragment = base->fragment;
985
          }
986
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
987
28
          state = kFragment;
988

14276
        } else if (has_base &&
989
7138
                   base->scheme != "file:") {
990
413
          state = kRelative;
991
413
          continue;
992
        } else {
993
6725
          url->scheme = "file:";
994
6725
          url->flags |= URL_FLAGS_SPECIAL;
995
6725
          special = true;
996
6725
          state = kFile;
997

6725
          special_back_slash = (special && ch == '\\');
998
6725
          continue;
999
        }
1000
28
        break;
1001
329
      case kSpecialRelativeOrAuthority:
1002

329
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1003
297
          state = kSpecialAuthorityIgnoreSlashes;
1004
297
          p++;
1005
        } else {
1006
32
          state = kRelative;
1007
32
          continue;
1008
        }
1009
297
        break;
1010
716
      case kPathOrAuthority:
1011
716
        if (ch == '/') {
1012
548
          state = kAuthority;
1013
        } else {
1014
168
          state = kPath;
1015
168
          continue;
1016
        }
1017
548
        break;
1018
445
      case kRelative:
1019
445
        url->scheme = base->scheme;
1020
445
        if (IsSpecial(url->scheme)) {
1021
345
          url->flags |= URL_FLAGS_SPECIAL;
1022
345
          special = true;
1023
        } else {
1024
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1025
100
          special = false;
1026
        }
1027

445
        special_back_slash = (special && ch == '\\');
1028

445
        switch (ch) {
1029
18
          case kEOL:
1030
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1031
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1032
4
              url->username = base->username;
1033
            }
1034
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1035
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1036
4
              url->password = base->password;
1037
            }
1038
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1039
16
              url->flags |= URL_FLAGS_HAS_HOST;
1040
16
              url->host = base->host;
1041
            }
1042
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1043
              url->flags |= URL_FLAGS_HAS_QUERY;
1044
              url->query = base->query;
1045
            }
1046
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1047
18
              url->flags |= URL_FLAGS_HAS_PATH;
1048
18
              url->path = base->path;
1049
            }
1050
18
            url->port = base->port;
1051
18
            break;
1052
154
          case '/':
1053
154
            state = kRelativeSlash;
1054
154
            break;
1055
38
          case '?':
1056
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1057
              url->flags |= URL_FLAGS_HAS_USERNAME;
1058
              url->username = base->username;
1059
            }
1060
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1061
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1062
              url->password = base->password;
1063
            }
1064
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1065
34
              url->flags |= URL_FLAGS_HAS_HOST;
1066
34
              url->host = base->host;
1067
            }
1068
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1069
38
              url->flags |= URL_FLAGS_HAS_PATH;
1070
38
              url->path = base->path;
1071
            }
1072
38
            url->port = base->port;
1073
38
            state = kQuery;
1074
38
            break;
1075
38
          case '#':
1076
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1077
              url->flags |= URL_FLAGS_HAS_USERNAME;
1078
              url->username = base->username;
1079
            }
1080
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1081
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1082
              url->password = base->password;
1083
            }
1084
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1085
34
              url->flags |= URL_FLAGS_HAS_HOST;
1086
34
              url->host = base->host;
1087
            }
1088
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1089
              url->flags |= URL_FLAGS_HAS_QUERY;
1090
              url->query = base->query;
1091
            }
1092
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1093
38
              url->flags |= URL_FLAGS_HAS_PATH;
1094
38
              url->path = base->path;
1095
            }
1096
38
            url->port = base->port;
1097
38
            state = kFragment;
1098
38
            break;
1099
197
          default:
1100
197
            if (special_back_slash) {
1101
18
              state = kRelativeSlash;
1102
            } else {
1103
179
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1104
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1105
1
                url->username = base->username;
1106
              }
1107
179
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1108
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1109
1
                url->password = base->password;
1110
              }
1111
179
              if (base->flags & URL_FLAGS_HAS_HOST) {
1112
159
                url->flags |= URL_FLAGS_HAS_HOST;
1113
159
                url->host = base->host;
1114
              }
1115
179
              if (base->flags & URL_FLAGS_HAS_PATH) {
1116
179
                url->flags |= URL_FLAGS_HAS_PATH;
1117
179
                url->path = base->path;
1118
179
                ShortenUrlPath(url);
1119
              }
1120
179
              url->port = base->port;
1121
179
              state = kPath;
1122
179
              continue;
1123
            }
1124
        }
1125
266
        break;
1126
172
      case kRelativeSlash:
1127


172
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1128
22
          state = kSpecialAuthorityIgnoreSlashes;
1129
150
        } else if (ch == '/') {
1130
6
          state = kAuthority;
1131
        } else {
1132
144
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1133
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1134
8
            url->username = base->username;
1135
          }
1136
144
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1137
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1138
4
            url->password = base->password;
1139
          }
1140
144
          if (base->flags & URL_FLAGS_HAS_HOST) {
1141
136
            url->flags |= URL_FLAGS_HAS_HOST;
1142
136
            url->host = base->host;
1143
          }
1144
144
          url->port = base->port;
1145
144
          state = kPath;
1146
144
          continue;
1147
        }
1148
28
        break;
1149
3579
      case kSpecialAuthoritySlashes:
1150
3579
        state = kSpecialAuthorityIgnoreSlashes;
1151

3579
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1152
3430
          p++;
1153
        } else {
1154
149
          continue;
1155
        }
1156
3430
        break;
1157
3975
      case kSpecialAuthorityIgnoreSlashes:
1158

3975
        if (ch != '/' && ch != '\\') {
1159
3898
          state = kAuthority;
1160
3898
          continue;
1161
        }
1162
77
        break;
1163
93977
      case kAuthority:
1164
93977
        if (ch == '@') {
1165
565
          if (atflag) {
1166
41
            buffer.reserve(buffer.size() + 3);
1167
41
            buffer.insert(0, "%40");
1168
          }
1169
565
          atflag = true;
1170
565
          size_t blen = buffer.size();
1171

565
          if (blen > 0 && buffer[0] != ':') {
1172
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1173
          }
1174
6652
          for (size_t n = 0; n < blen; n++) {
1175
6087
            const char bch = buffer[n];
1176
6087
            if (bch == ':') {
1177
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1178
444
              if (!password_token_seen_flag) {
1179
428
                password_token_seen_flag = true;
1180
428
                continue;
1181
              }
1182
            }
1183
5659
            if (password_token_seen_flag) {
1184
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1185
            } else {
1186
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1187
            }
1188
          }
1189
565
          buffer.clear();
1190

93412
        } else if (ch == kEOL ||
1191
89030
                   ch == '/' ||
1192
88998
                   ch == '?' ||
1193
88980
                   ch == '#' ||
1194
                   special_back_slash) {
1195

4452
          if (atflag && buffer.size() == 0) {
1196
52
            url->flags |= URL_FLAGS_FAILED;
1197
52
            return;
1198
          }
1199
4400
          p -= buffer.size() + 1;
1200
4400
          buffer.clear();
1201
4400
          state = kHost;
1202
        } else {
1203
88960
          buffer += ch;
1204
        }
1205
93925
        break;
1206
85362
      case kHost:
1207
      case kHostname:
1208

85362
        if (has_state_override && url->scheme == "file:") {
1209
12
          state = kFileHost;
1210
12
          continue;
1211

85350
        } else if (ch == ':' && !square_bracket_flag) {
1212
1600
          if (buffer.size() == 0) {
1213
24
            url->flags |= URL_FLAGS_FAILED;
1214
24
            return;
1215
          }
1216
1576
          if (state_override == kHostname) {
1217
4
            return;
1218
          }
1219
1572
          url->flags |= URL_FLAGS_HAS_HOST;
1220
1572
          if (!ParseHost(buffer, &url->host, special)) {
1221
5
            url->flags |= URL_FLAGS_FAILED;
1222
5
            return;
1223
          }
1224
1567
          buffer.clear();
1225
1567
          state = kPort;
1226

83750
        } else if (ch == kEOL ||
1227
80660
                   ch == '/' ||
1228
80620
                   ch == '?' ||
1229
80594
                   ch == '#' ||
1230
                   special_back_slash) {
1231
3180
          p--;
1232

3180
          if (special && buffer.size() == 0) {
1233
21
            url->flags |= URL_FLAGS_FAILED;
1234
21
            return;
1235
          }
1236
331
          if (has_state_override &&
1237

3528
              buffer.size() == 0 &&
1238
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1239
38
               url->port != -1)) {
1240
8
            url->flags |= URL_FLAGS_TERMINATED;
1241
8
            return;
1242
          }
1243
3151
          url->flags |= URL_FLAGS_HAS_HOST;
1244
3151
          if (!ParseHost(buffer, &url->host, special)) {
1245
432
            url->flags |= URL_FLAGS_FAILED;
1246
432
            return;
1247
          }
1248
2719
          buffer.clear();
1249
2719
          state = kPathStart;
1250
2719
          if (has_state_override) {
1251
227
            return;
1252
          }
1253
        } else {
1254
80570
          if (ch == '[')
1255
359
            square_bracket_flag = true;
1256
80570
          if (ch == ']')
1257
355
            square_bracket_flag = false;
1258
80570
          buffer += ch;
1259
        }
1260
84629
        break;
1261
9049
      case kPort:
1262
9049
        if (IsASCIIDigit(ch)) {
1263
7413
          buffer += ch;
1264

1636
        } else if (has_state_override ||
1265
1118
                   ch == kEOL ||
1266
36
                   ch == '/' ||
1267
36
                   ch == '?' ||
1268
36
                   ch == '#' ||
1269
                   special_back_slash) {
1270
1600
          if (buffer.size() > 0) {
1271
1586
            unsigned port = 0;
1272
            // the condition port <= 0xffff prevents integer overflow
1273

8783
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1274
7197
              port = port * 10 + buffer[i] - '0';
1275
1586
            if (port > 0xffff) {
1276
              // TODO(TimothyGu): This hack is currently needed for the host
1277
              // setter since it needs access to hostname if it is valid, and
1278
              // if the FAILED flag is set the entire response to JS layer
1279
              // will be empty.
1280
26
              if (state_override == kHost)
1281
2
                url->port = -1;
1282
              else
1283
24
                url->flags |= URL_FLAGS_FAILED;
1284
26
              return;
1285
            }
1286
            // the port is valid
1287
1560
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1288
1560
            if (url->port == -1)
1289
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1290
1560
            buffer.clear();
1291
14
          } else if (has_state_override) {
1292
            // TODO(TimothyGu): Similar case as above.
1293
6
            if (state_override == kHost)
1294
2
              url->port = -1;
1295
            else
1296
4
              url->flags |= URL_FLAGS_TERMINATED;
1297
6
            return;
1298
          }
1299
1568
          state = kPathStart;
1300
1568
          continue;
1301
        } else {
1302
36
          url->flags |= URL_FLAGS_FAILED;
1303
36
          return;
1304
        }
1305
7413
        break;
1306
127782
      case kFile:
1307
127782
        url->scheme = "file:";
1308
127782
        url->host.clear();
1309
127782
        url->flags |= URL_FLAGS_HAS_HOST;
1310

127782
        if (ch == '/' || ch == '\\') {
1311
121180
          state = kFileSlash;
1312

6602
        } else if (has_base && base->scheme == "file:") {
1313

6583
          switch (ch) {
1314
4
            case kEOL:
1315
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1316
4
                url->host = base->host;
1317
              }
1318
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1319
4
                url->flags |= URL_FLAGS_HAS_PATH;
1320
4
                url->path = base->path;
1321
              }
1322
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1323
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1324
4
                url->query = base->query;
1325
              }
1326
4
              break;
1327
4
            case '?':
1328
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1329
4
                url->host = base->host;
1330
              }
1331
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1332
4
                url->flags |= URL_FLAGS_HAS_PATH;
1333
4
                url->path = base->path;
1334
              }
1335
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1336
4
              url->query.clear();
1337
4
              state = kQuery;
1338
4
              break;
1339
4
            case '#':
1340
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1341
4
                url->host = base->host;
1342
              }
1343
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1344
4
                url->flags |= URL_FLAGS_HAS_PATH;
1345
4
                url->path = base->path;
1346
              }
1347
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1348
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1349
4
                url->query = base->query;
1350
              }
1351
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1352
4
              url->fragment.clear();
1353
4
              state = kFragment;
1354
4
              break;
1355
6571
            default:
1356
6571
              url->query.clear();
1357
6571
              if (base->flags & URL_FLAGS_HAS_HOST) {
1358
6571
                url->host = base->host;
1359
              }
1360
6571
              if (base->flags & URL_FLAGS_HAS_PATH) {
1361
6571
                url->flags |= URL_FLAGS_HAS_PATH;
1362
6571
                url->path = base->path;
1363
              }
1364
6571
              if (!StartsWithWindowsDriveLetter(p, end)) {
1365
6547
                ShortenUrlPath(url);
1366
              } else {
1367
24
                url->path.clear();
1368
              }
1369
6571
              state = kPath;
1370
6571
              continue;
1371
          }
1372
        } else {
1373
19
          state = kPath;
1374
19
          continue;
1375
        }
1376
121192
        break;
1377
121180
      case kFileSlash:
1378

121180
        if (ch == '/' || ch == '\\') {
1379
121013
          state = kFileHost;
1380
        } else {
1381

167
          if (has_base && base->scheme == "file:") {
1382
133
            url->flags |= URL_FLAGS_HAS_HOST;
1383
133
            url->host = base->host;
1384

252
            if (!StartsWithWindowsDriveLetter(p, end) &&
1385
119
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1386
4
              url->flags |= URL_FLAGS_HAS_PATH;
1387
4
              url->path.push_back(base->path[0]);
1388
            }
1389
          }
1390
167
          state = kPath;
1391
167
          continue;
1392
        }
1393
121013
        break;
1394
122130
      case kFileHost:
1395

122130
        if (ch == kEOL ||
1396
1115
            ch == '/' ||
1397
1105
            ch == '\\' ||
1398
1105
            ch == '?' ||
1399
            ch == '#') {
1400
121013
          if (!has_state_override &&
1401

242038
              buffer.size() == 2 &&
1402
22
              IsWindowsDriveLetter(buffer)) {
1403
12
            state = kPath;
1404
121013
          } else if (buffer.size() == 0) {
1405
120820
            url->flags |= URL_FLAGS_HAS_HOST;
1406
120820
            url->host.clear();
1407
120820
            if (has_state_override)
1408
4
              return;
1409
120816
            state = kPathStart;
1410
          } else {
1411
193
            std::string host;
1412
193
            if (!ParseHost(buffer, &host, special)) {
1413
52
              url->flags |= URL_FLAGS_FAILED;
1414
52
              return;
1415
            }
1416
141
            if (host == "localhost")
1417
37
              host.clear();
1418
141
            url->flags |= URL_FLAGS_HAS_HOST;
1419
141
            url->host = host;
1420
141
            if (has_state_override)
1421
4
              return;
1422
137
            buffer.clear();
1423
137
            state = kPathStart;
1424
          }
1425
120965
          continue;
1426
        } else {
1427
1105
          buffer += ch;
1428
        }
1429
1105
        break;
1430
200101
      case kPathStart:
1431
200101
        if (IsSpecial(url->scheme)) {
1432
199553
          state = kPath;
1433

199553
          if (ch != '/' && ch != '\\') {
1434
75822
            continue;
1435
          }
1436

548
        } else if (!has_state_override && ch == '?') {
1437
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1438
6
          url->query.clear();
1439
6
          state = kQuery;
1440

542
        } else if (!has_state_override && ch == '#') {
1441
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1442
6
          url->fragment.clear();
1443
6
          state = kFragment;
1444
536
        } else if (ch != kEOL) {
1445
459
          state = kPath;
1446
459
          if (ch != '/') {
1447
35
            continue;
1448
          }
1449

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1450
2
          url->flags |= URL_FLAGS_HAS_PATH;
1451
2
          url->path.emplace_back("");
1452
        }
1453
124244
        break;
1454
12766586
      case kPath:
1455

12766586
        if (ch == kEOL ||
1456
11538149
            ch == '/' ||
1457
11538079
            special_back_slash ||
1458

11538079
            (!has_state_override && (ch == '?' || ch == '#'))) {
1459
1229526
          if (IsDoubleDotSegment(buffer)) {
1460
4447
            ShortenUrlPath(url);
1461

4447
            if (ch != '/' && !special_back_slash) {
1462
280
              url->flags |= URL_FLAGS_HAS_PATH;
1463
280
              url->path.emplace_back("");
1464
            }
1465
1228200
          } else if (IsSingleDotSegment(buffer) &&
1466

1228200
                     ch != '/' && !special_back_slash) {
1467
718
            url->flags |= URL_FLAGS_HAS_PATH;
1468
718
            url->path.emplace_back("");
1469
1224361
          } else if (!IsSingleDotSegment(buffer)) {
1470
2438011
            if (url->scheme == "file:" &&
1471
1378547
                url->path.empty() &&
1472

2600505
                buffer.size() == 2 &&
1473
100
                IsWindowsDriveLetter(buffer)) {
1474
98
              buffer[1] = ':';
1475
            }
1476
1221958
            url->flags |= URL_FLAGS_HAS_PATH;
1477
1221958
            url->path.emplace_back(std::move(buffer));
1478
          }
1479
1229526
          buffer.clear();
1480
2459052
          if (ch == '?') {
1481
964
            url->flags |= URL_FLAGS_HAS_QUERY;
1482
964
            url->query.clear();
1483
964
            state = kQuery;
1484
1228562
          } else if (ch == '#') {
1485
55
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1486
55
            url->fragment.clear();
1487
55
            state = kFragment;
1488
          }
1489
        } else {
1490
11537060
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1491
        }
1492
12766586
        break;
1493
49583
      case kCannotBeBase:
1494
49583
        switch (ch) {
1495
4
          case '?':
1496
4
            state = kQuery;
1497
4
            break;
1498
10
          case '#':
1499
10
            state = kFragment;
1500
10
            break;
1501
49569
          default:
1502
49569
            if (url->path.empty())
1503
              url->path.emplace_back("");
1504
49569
            else if (ch != kEOL)
1505
44496
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1506
        }
1507
49583
        break;
1508
61614
      case kQuery:
1509

61614
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1510
1175
          url->flags |= URL_FLAGS_HAS_QUERY;
1511
1175
          url->query = std::move(buffer);
1512
1175
          buffer.clear();
1513
1564
          if (ch == '#')
1514
389
            state = kFragment;
1515
        } else {
1516
60439
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1517
                                                QUERY_ENCODE_SET_NONSPECIAL);
1518
        }
1519
61614
        break;
1520
4255
      case kFragment:
1521
4255
        switch (ch) {
1522
604
          case kEOL:
1523
604
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1524
604
            url->fragment = std::move(buffer);
1525
604
            break;
1526
3651
          default:
1527
3651
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1528
        }
1529
4255
        break;
1530
      default:
1531
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1532
        return;
1533
    }
1534
1535
14121249
    p++;
1536
  }
1537
}  // NOLINT(readability/fn_size)
1538
1539
// https://url.spec.whatwg.org/#url-serializing
1540
34135
std::string URL::SerializeURL(const url_data& url,
1541
                              bool exclude = false) {
1542
34135
  std::string output;
1543
34135
  output.reserve(
1544
    10 +  // We generally insert < 10 separator characters between URL parts
1545
34135
    url.scheme.size() +
1546
34135
    url.username.size() +
1547
34135
    url.password.size() +
1548
34135
    url.host.size() +
1549
34135
    url.query.size() +
1550
34135
    url.fragment.size() +
1551
34135
    url.href.size() +
1552
34135
    std::accumulate(
1553
        url.path.begin(),
1554
        url.path.end(),
1555
        0,
1556
360493
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1557
1558
34135
  output += url.scheme;
1559
34135
  if (url.flags & URL_FLAGS_HAS_HOST) {
1560
34135
    output += "//";
1561
34135
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1562
34135
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1563
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1564
        output += url.username;
1565
      }
1566
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1567
        output += ":" + url.password;
1568
      }
1569
      output += "@";
1570
    }
1571
34135
    output += url.host;
1572
34135
    if (url.port != -1) {
1573
      output += ":" + std::to_string(url.port);
1574
    }
1575
  }
1576
34135
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1577
    output += url.path[0];
1578
  } else {
1579
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1580

34135
          url.path.size() > 1 &&
1581
          url.path[0].empty()) {
1582
      output += "/.";
1583
    }
1584
360493
    for (size_t i = 1; i < url.path.size(); i++) {
1585
326358
      output += "/" + url.path[i];
1586
    }
1587
  }
1588
34135
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1589
    output += "?" + url.query;
1590
  }
1591

34135
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1592
    output += "#" + url.fragment;
1593
  }
1594
34135
  output.shrink_to_fit();
1595
34135
  return output;
1596
}
1597
1598
namespace {
1599
144796
void SetArgs(Environment* env,
1600
             Local<Value> argv[ARG_COUNT],
1601
             const struct url_data& url) {
1602
144796
  Isolate* isolate = env->isolate();
1603
144796
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1604
289592
  argv[ARG_PROTOCOL] =
1605
144796
      url.flags & URL_FLAGS_SPECIAL ?
1606
138752
          GetSpecial(env, url.scheme) :
1607
6044
          OneByteString(isolate, url.scheme.c_str());
1608
144796
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1609
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1610
144796
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1611
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1612
144796
  if (url.flags & URL_FLAGS_HAS_HOST)
1613
278896
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1614
144796
  if (url.flags & URL_FLAGS_HAS_QUERY)
1615
2366
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1616
144796
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1617
1200
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1618
144796
  if (url.port > -1)
1619
3400
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1620
144796
  if (url.flags & URL_FLAGS_HAS_PATH)
1621
288400
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1622
144796
}
1623
1624
153326
void Parse(Environment* env,
1625
           Local<Value> recv,
1626
           const char* input,
1627
           size_t len,
1628
           enum url_parse_state state_override,
1629
           Local<Value> base_obj,
1630
           Local<Value> context_obj,
1631
           Local<Function> cb,
1632
           Local<Value> error_cb) {
1633
153326
  Isolate* isolate = env->isolate();
1634
153326
  Local<Context> context = env->context();
1635
153326
  HandleScope handle_scope(isolate);
1636
153326
  Context::Scope context_scope(context);
1637
1638
153326
  const bool has_context = context_obj->IsObject();
1639
153326
  const bool has_base = base_obj->IsObject();
1640
1641
153326
  url_data base;
1642
153326
  url_data url;
1643
153326
  if (has_context)
1644
41730
    url = HarvestContext(env, context_obj.As<Object>());
1645
153326
  if (has_base)
1646
8846
    base = HarvestBase(env, base_obj.As<Object>());
1647
1648
153326
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1649

153326
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1650
41730
      ((state_override != kUnknownState) &&
1651
41730
       (url.flags & URL_FLAGS_TERMINATED)))
1652
44
    return;
1653
1654
  // Define the return value placeholders
1655
153282
  const Local<Value> undef = Undefined(isolate);
1656
153282
  const Local<Value> null = Null(isolate);
1657
153282
  if (!(url.flags & URL_FLAGS_FAILED)) {
1658
    Local<Value> argv[] = {
1659
      undef,
1660
      undef,
1661
      undef,
1662
      undef,
1663
      null,  // host defaults to null
1664
      null,  // port defaults to null
1665
      undef,
1666
      null,  // query defaults to null
1667
      null,  // fragment defaults to null
1668
144796
    };
1669
144796
    SetArgs(env, argv, url);
1670
289592
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1671
8486
  } else if (error_cb->IsFunction()) {
1672
16712
    Local<Value> flags = Integer::NewFromUnsigned(isolate, url.flags);
1673
8356
    USE(error_cb.As<Function>()->Call(context, recv, 1, &flags));
1674
  }
1675
}
1676
1677
153326
void Parse(const FunctionCallbackInfo<Value>& args) {
1678
153326
  Environment* env = Environment::GetCurrent(args);
1679
153326
  CHECK_GE(args.Length(), 5);
1680
306652
  CHECK(args[0]->IsString());  // input
1681


416650
  CHECK(args[2]->IsUndefined() ||  // base context
1682
        args[2]->IsNull() ||
1683
        args[2]->IsObject());
1684


431842
  CHECK(args[3]->IsUndefined() ||  // context
1685
        args[3]->IsNull() ||
1686
        args[3]->IsObject());
1687
153326
  CHECK(args[4]->IsFunction());  // complete callback
1688

418248
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1689
1690
153326
  Utf8Value input(env->isolate(), args[0]);
1691
153326
  enum url_parse_state state_override = kUnknownState;
1692
153326
  if (args[1]->IsNumber()) {
1693
153326
    state_override = static_cast<enum url_parse_state>(
1694
306652
        args[1]->Uint32Value(env->context()).FromJust());
1695
  }
1696
1697
306652
  Parse(env, args.This(),
1698
153326
        *input, input.length(),
1699
        state_override,
1700
        args[2],
1701
        args[3],
1702
306652
        args[4].As<Function>(),
1703
        args[5]);
1704
153326
}
1705
1706
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1707
92
  Environment* env = Environment::GetCurrent(args);
1708
92
  CHECK_GE(args.Length(), 1);
1709
184
  CHECK(args[0]->IsString());
1710
184
  Utf8Value value(env->isolate(), args[0]);
1711
92
  std::string output;
1712
92
  size_t len = value.length();
1713
92
  output.reserve(len);
1714
756
  for (size_t n = 0; n < len; n++) {
1715
664
    const char ch = (*value)[n];
1716
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1717
  }
1718
276
  args.GetReturnValue().Set(
1719
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1720
92
}
1721
1722
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1723
229
  Environment* env = Environment::GetCurrent(args);
1724
229
  CHECK_GE(args.Length(), 1);
1725
458
  CHECK(args[0]->IsString());
1726
229
  Utf8Value value(env->isolate(), args[0]);
1727
1728
229
  URLHost host;
1729
  // Assuming the host is used for a special scheme.
1730
229
  host.ParseHost(*value, value.length(), true);
1731
229
  if (host.ParsingFailed()) {
1732
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1733
12
    return;
1734
  }
1735
217
  std::string out = host.ToStringMove();
1736
651
  args.GetReturnValue().Set(
1737
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1738
}
1739
1740
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1741
207
  Environment* env = Environment::GetCurrent(args);
1742
207
  CHECK_GE(args.Length(), 1);
1743
414
  CHECK(args[0]->IsString());
1744
207
  Utf8Value value(env->isolate(), args[0]);
1745
1746
207
  URLHost host;
1747
  // Assuming the host is used for a special scheme.
1748
207
  host.ParseHost(*value, value.length(), true, true);
1749
207
  if (host.ParsingFailed()) {
1750
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1751
12
    return;
1752
  }
1753
195
  std::string out = host.ToStringMove();
1754
585
  args.GetReturnValue().Set(
1755
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1756
}
1757
1758
627
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1759
627
  Environment* env = Environment::GetCurrent(args);
1760
627
  CHECK_EQ(args.Length(), 1);
1761
627
  CHECK(args[0]->IsFunction());
1762
1254
  env->set_url_constructor_function(args[0].As<Function>());
1763
627
}
1764
1765
627
void Initialize(Local<Object> target,
1766
                Local<Value> unused,
1767
                Local<Context> context,
1768
                void* priv) {
1769
627
  Environment* env = Environment::GetCurrent(context);
1770
627
  env->SetMethod(target, "parse", Parse);
1771
627
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1772
627
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1773
627
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1774
627
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1775
1776
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1777
16929
  FLAGS(XX)
1778
#undef XX
1779
1780
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1781
26334
  PARSESTATES(XX)
1782
#undef XX
1783
627
}
1784
}  // namespace
1785
1786
4959
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1787
4959
  registry->Register(Parse);
1788
4959
  registry->Register(EncodeAuthSet);
1789
4959
  registry->Register(DomainToASCII);
1790
4959
  registry->Register(DomainToUnicode);
1791
4959
  registry->Register(SetURLConstructor);
1792
4959
}
1793
1794
8
std::string URL::ToFilePath() const {
1795
8
  if (context_.scheme != "file:") {
1796
1
    return "";
1797
  }
1798
1799
#ifdef _WIN32
1800
  const char* slash = "\\";
1801
  auto is_slash = [] (char ch) {
1802
    return ch == '/' || ch == '\\';
1803
  };
1804
#else
1805
7
  const char* slash = "/";
1806
46
  auto is_slash = [] (char ch) {
1807
46
    return ch == '/';
1808
  };
1809

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1810
7
      context_.host.length() > 0) {
1811
1
    return "";
1812
  }
1813
#endif
1814
12
  std::string decoded_path;
1815
18
  for (const std::string& part : context_.path) {
1816
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1817
58
    for (char& ch : decoded) {
1818
46
      if (is_slash(ch)) {
1819
1
        return "";
1820
      }
1821
    }
1822
12
    decoded_path += slash + decoded;
1823
  }
1824
1825
#ifdef _WIN32
1826
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1827
1828
  // If hostname is set, then we have a UNC path. Pass the hostname through
1829
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1830
  // need to worry about percent encoding because the URL parser will have
1831
  // already taken care of that for us. Note that this only causes IDNs with an
1832
  // appropriate `xn--` prefix to be decoded.
1833
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1834
      context_.host.length() > 0) {
1835
    std::string unicode_host;
1836
    if (!ToUnicode(context_.host, &unicode_host)) {
1837
      return "";
1838
    }
1839
    return "\\\\" + unicode_host + decoded_path;
1840
  }
1841
  // Otherwise, it's a local path that requires a drive letter.
1842
  if (decoded_path.length() < 3) {
1843
    return "";
1844
  }
1845
  if (decoded_path[2] != ':' ||
1846
      !IsASCIIAlpha(decoded_path[1])) {
1847
    return "";
1848
  }
1849
  // Strip out the leading '\'.
1850
  return decoded_path.substr(1);
1851
#else
1852
5
  return decoded_path;
1853
#endif
1854
}
1855
1856
34135
URL URL::FromFilePath(const std::string& file_path) {
1857
68270
  URL url("file://");
1858
68270
  std::string escaped_file_path;
1859
3621599
  for (size_t i = 0; i < file_path.length(); ++i) {
1860
3587464
    escaped_file_path += file_path[i];
1861
3587464
    if (file_path[i] == '%')
1862
12
      escaped_file_path += "25";
1863
  }
1864
34135
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1865
             &url.context_, true, nullptr, false);
1866
34135
  return url;
1867
}
1868
1869
// This function works by calling out to a JS function that creates and
1870
// returns the JS URL object. Be mindful of the JS<->Native boundary
1871
// crossing that is required.
1872
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1873
  Isolate* isolate = env->isolate();
1874
  Local<Context> context = env->context();
1875
  Context::Scope context_scope(context);
1876
1877
  const Local<Value> undef = Undefined(isolate);
1878
  const Local<Value> null = Null(isolate);
1879
1880
  if (context_.flags & URL_FLAGS_FAILED)
1881
    return Local<Value>();
1882
1883
  Local<Value> argv[] = {
1884
    undef,
1885
    undef,
1886
    undef,
1887
    undef,
1888
    null,  // host defaults to null
1889
    null,  // port defaults to null
1890
    undef,
1891
    null,  // query defaults to null
1892
    null,  // fragment defaults to null
1893
  };
1894
  SetArgs(env, argv, context_);
1895
1896
  MaybeLocal<Value> ret;
1897
  {
1898
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1899
1900
    // The SetURLConstructor method must have been called already to
1901
    // set the constructor function used below. SetURLConstructor is
1902
    // called automatically when the internal/url.js module is loaded
1903
    // during the internal/bootstrap/node.js processing.
1904
    ret = env->url_constructor_function()
1905
        ->Call(env->context(), undef, arraysize(argv), argv);
1906
  }
1907
1908
  return ret;
1909
}
1910
1911
}  // namespace url
1912
}  // namespace node
1913
1914
5020
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1915
4959
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)