GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1127 1185 95.1 %
Date: 2022-05-29 04:15:40 Branches: 970 1096 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <numeric>
11
#include <string>
12
#include <vector>
13
14
namespace node {
15
16
using errors::TryCatchScope;
17
18
using url::table_data::hex;
19
using url::table_data::C0_CONTROL_ENCODE_SET;
20
using url::table_data::FRAGMENT_ENCODE_SET;
21
using url::table_data::PATH_ENCODE_SET;
22
using url::table_data::USERINFO_ENCODE_SET;
23
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
24
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
25
26
using v8::Array;
27
using v8::Context;
28
using v8::Function;
29
using v8::FunctionCallbackInfo;
30
using v8::HandleScope;
31
using v8::Int32;
32
using v8::Integer;
33
using v8::Isolate;
34
using v8::Local;
35
using v8::MaybeLocal;
36
using v8::NewStringType;
37
using v8::Null;
38
using v8::Object;
39
using v8::String;
40
using v8::Undefined;
41
using v8::Value;
42
43
149057
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
44
149057
  return String::NewFromUtf8(isolate,
45
                             str.data(),
46
                             NewStringType::kNormal,
47
149057
                             str.length()).ToLocalChecked();
48
}
49
50
namespace url {
51
namespace {
52
53
// https://url.spec.whatwg.org/#eof-code-point
54
constexpr char kEOL = -1;
55
56
// https://url.spec.whatwg.org/#concept-host
57
class URLHost {
58
 public:
59
  ~URLHost();
60
61
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62
  void ParseIPv6Host(const char* input, size_t length);
63
  void ParseOpaqueHost(const char* input, size_t length);
64
  void ParseHost(const char* input,
65
                 size_t length,
66
                 bool is_special,
67
                 bool unicode = false);
68
69
5750
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
70
  std::string ToString() const;
71
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
72
  std::string ToStringMove();
73
74
 private:
75
  enum class HostType {
76
    H_FAILED,
77
    H_DOMAIN,
78
    H_IPV4,
79
    H_IPV6,
80
    H_OPAQUE,
81
  };
82
83
  union Value {
84
    std::string domain_or_opaque;
85
    uint32_t ipv4;
86
    uint16_t ipv6[8];
87
88
5750
    ~Value() {}
89
5750
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
15627
  void Reset() {
96
    using string = std::string;
97
15627
    switch (type_) {
98
4640
      case HostType::H_DOMAIN:
99
      case HostType::H_OPAQUE:
100
4640
        value_.domain_or_opaque.~string();
101
4640
        break;
102
10987
      default:
103
10987
        break;
104
    }
105
15627
    type_ = HostType::H_FAILED;
106
15627
  }
107
108
  // Setting the string members of the union with = is brittle because
109
  // it relies on them being initialized to a state that requires no
110
  // destruction of old data.
111
  // For a long time, that worked well enough because ParseIPv6Host() happens
112
  // to zero-fill `value_`, but that really is relying on standard library
113
  // internals too much.
114
  // These helpers are the easiest solution but we might want to consider
115
  // just not forcing strings into an union.
116
458
  void SetOpaque(std::string&& string) {
117
458
    Reset();
118
458
    type_ = HostType::H_OPAQUE;
119
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
458
  }
121
122
4182
  void SetDomain(std::string&& string) {
123
4182
    Reset();
124
4182
    type_ = HostType::H_DOMAIN;
125
4182
    new(&value_.domain_or_opaque) std::string(std::move(string));
126
4182
  }
127
};
128
129
5750
URLHost::~URLHost() {
130
5750
  Reset();
131
5750
}
132
133
#define ARGS(XX)                                                              \
134
  XX(ARG_FLAGS)                                                               \
135
  XX(ARG_PROTOCOL)                                                            \
136
  XX(ARG_USERNAME)                                                            \
137
  XX(ARG_PASSWORD)                                                            \
138
  XX(ARG_HOST)                                                                \
139
  XX(ARG_PORT)                                                                \
140
  XX(ARG_PATH)                                                                \
141
  XX(ARG_QUERY)                                                               \
142
  XX(ARG_FRAGMENT)                                                            \
143
  XX(ARG_COUNT)  // This one has to be last.
144
145
enum url_cb_args {
146
#define XX(name) name,
147
  ARGS(XX)
148
#undef XX
149
};
150
151
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
152
  template <typename T>                                                       \
153
  bool name(const T ch1, const T ch2) {                                \
154
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
155
                  "Character must be wider than " #bits " bits");             \
156
    return (expr);                                                            \
157
  }                                                                           \
158
  template <typename T>                                                       \
159
  bool name(const std::basic_string<T>& str) {                         \
160
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
161
                  "Character must be wider than " #bits " bits");             \
162
    return str.length() >= 2 && name(str[0], str[1]);                         \
163
  }
164
165
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166

14418481
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
167
168
// https://infra.spec.whatwg.org/#c0-control-or-space
169

303552
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
170
171
// https://infra.spec.whatwg.org/#ascii-digit
172

590232
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
173
174
// https://infra.spec.whatwg.org/#ascii-hex-digit
175


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
176
                               (ch >= 'A' && ch <= 'F') ||
177
                               (ch >= 'a' && ch <= 'f')))
178
179
// https://infra.spec.whatwg.org/#ascii-alpha
180


1304390
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
181
                            (ch >= 'a' && ch <= 'z')))
182
183
// https://infra.spec.whatwg.org/#ascii-alphanumeric
184

572816
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
185
186
// https://infra.spec.whatwg.org/#ascii-lowercase
187
template <typename T>
188
572888
T ASCIILowercase(T ch) {
189
572888
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
190
}
191
192
// https://url.spec.whatwg.org/#forbidden-host-code-point
193









97442
CHAR_TEST(8, IsForbiddenHostCodePoint,
194
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
195
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
196
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
197
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
198
          ch == '^' || ch == '|')
199
200
// https://url.spec.whatwg.org/#windows-drive-letter
201

12744
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
202
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
203
204
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
205

2752
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
206
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
207
208
#undef TWO_CHAR_STRING_TEST
209
210
12137491
bool BitAt(const uint8_t a[], const uint8_t i) {
211
12137491
  return !!(a[i >> 3] & (1 << (i & 7)));
212
}
213
214
// Appends ch to str. If ch position in encode_set is set, the ch will
215
// be percent-encoded then appended.
216
12137491
void AppendOrEscape(std::string* str,
217
                    const unsigned char ch,
218
                    const uint8_t encode_set[]) {
219
12137491
  if (BitAt(encode_set, ch))
220
1930
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
221
  else
222
12135561
    *str += ch;
223
12137491
}
224
225
850
unsigned hex2bin(const char ch) {
226

850
  if (ch >= '0' && ch <= '9')
227
546
    return ch - '0';
228

304
  if (ch >= 'A' && ch <= 'F')
229
172
    return 10 + (ch - 'A');
230

132
  if (ch >= 'a' && ch <= 'f')
231
132
    return 10 + (ch - 'a');
232
  UNREACHABLE();
233
}
234
235
4888
std::string PercentDecode(const char* input, size_t len) {
236
4888
  std::string dest;
237
4888
  if (len == 0)
238
2
    return dest;
239
4886
  dest.reserve(len);
240
4886
  const char* pointer = input;
241
4886
  const char* end = input + len;
242
243
100878
  while (pointer < end) {
244
95992
    const char ch = pointer[0];
245
95992
    size_t remaining = end - pointer - 1;
246


96429
    if (ch != '%' || remaining < 2 ||
247
437
        (ch == '%' &&
248
437
         (!IsASCIIHexDigit(pointer[1]) ||
249
433
          !IsASCIIHexDigit(pointer[2])))) {
250
95567
      dest += ch;
251
95567
      pointer++;
252
95567
      continue;
253
    } else {
254
425
      unsigned a = hex2bin(pointer[1]);
255
425
      unsigned b = hex2bin(pointer[2]);
256
425
      char c = static_cast<char>(a * 16 + b);
257
425
      dest += c;
258
425
      pointer += 3;
259
    }
260
  }
261
4886
  return dest;
262
}
263
264
#define SPECIALS(XX)                                                          \
265
  XX(ftp, 21, "ftp:")                                                         \
266
  XX(file, -1, "file:")                                                       \
267
  XX(http, 80, "http:")                                                       \
268
  XX(https, 443, "https:")                                                    \
269
  XX(ws, 80, "ws:")                                                           \
270
  XX(wss, 443, "wss:")
271
272
346893
bool IsSpecial(const std::string& scheme) {
273
#define V(_, __, name) if (scheme == name) return true;
274



346893
  SPECIALS(V);
275
#undef V
276
6587
  return false;
277
}
278
279
145386
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
280
#define V(key, _, name) if (scheme == name)                                  \
281
    return env->url_special_##key##_string();
282



145386
  SPECIALS(V)
283
#undef V
284
  UNREACHABLE();
285
}
286
287
138191
int NormalizePort(const std::string& scheme, int p) {
288
#define V(_, port, name) if (scheme == name && p == port) return -1;
289









138191
  SPECIALS(V);
290
#undef V
291
11849
  return p;
292
}
293
294
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
295
6831
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
296
6831
  size_t length = end - p;
297
6128
  return length >= 2 &&
298

12995
    IsWindowsDriveLetter(p[0], p[1]) &&
299
36
    (length == 2 ||
300
36
      p[2] == '/' ||
301
14
      p[2] == '\\' ||
302
6
      p[2] == '?' ||
303
6835
      p[2] == '#');
304
}
305
306
#if defined(NODE_HAVE_I18N_SUPPORT)
307
195
bool ToUnicode(const std::string& input, std::string* output) {
308
390
  MaybeStackBuffer<char> buf;
309
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
310
    return false;
311
195
  output->assign(*buf, buf.length());
312
195
  return true;
313
}
314
315
4875
bool ToASCII(const std::string& input, std::string* output) {
316
9750
  MaybeStackBuffer<char> buf;
317
4875
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
318
124
    return false;
319
4751
  if (buf.length() == 0)
320
24
    return false;
321
4727
  output->assign(*buf, buf.length());
322
4727
  return true;
323
}
324
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
325
// Intentional non-ops if ICU is not present.
326
bool ToUnicode(const std::string& input, std::string* output) {
327
  *output = input;
328
  return true;
329
}
330
331
bool ToASCII(const std::string& input, std::string* output) {
332
  *output = input;
333
  return true;
334
}
335
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
336
337
#define NS_IN6ADDRSZ 16
338
339
347
void URLHost::ParseIPv6Host(const char* input, size_t length) {
340
347
  CHECK_EQ(type_, HostType::H_FAILED);
341
342
  unsigned char buf[sizeof(struct in6_addr)];
343
347
  MaybeStackBuffer<char> ipv6(length + 1);
344
347
  *(*ipv6 + length) = 0;
345
347
  memset(buf, 0, sizeof(buf));
346
347
  memcpy(*ipv6, input, sizeof(const char) * length);
347
348
347
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
349
350
347
  if (ret != 0) {
351
92
    return;
352
  }
353
354
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
355
2295
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
356
2040
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
357
  }
358
359
255
  type_ = HostType::H_IPV6;
360
}
361
362
5536
int64_t ParseNumber(const char* start, const char* end) {
363
5536
  unsigned R = 10;
364

5536
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
365
48
    start += 2;
366
48
    R = 16;
367
  }
368
5536
  if (end - start == 0) {
369
8
    return 0;
370

5528
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
371
55
    start++;
372
55
    R = 8;
373
  }
374
5528
  const char* p = start;
375
376
8265
  while (p < end) {
377
6903
    const char ch = p[0];
378

6903
    switch (R) {
379
274
      case 8:
380

274
        if (ch < '0' || ch > '7')
381
29
          return -1;
382
245
        break;
383
6421
      case 10:
384
6421
        if (!IsASCIIDigit(ch))
385
4133
          return -1;
386
2288
        break;
387
208
      case 16:
388
208
        if (!IsASCIIHexDigit(ch))
389
4
          return -1;
390
204
        break;
391
    }
392
2737
    p++;
393
  }
394
1362
  return strtoll(start, nullptr, R);
395
}
396
397
4552
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
398
4552
  CHECK_EQ(type_, HostType::H_FAILED);
399
4552
  *is_ipv4 = false;
400
4552
  const char* pointer = input;
401
4552
  const char* mark = input;
402
4552
  const char* end = pointer + length;
403
4552
  int parts = 0;
404
4552
  uint32_t val = 0;
405
  uint64_t numbers[4];
406
4552
  int tooBigNumbers = 0;
407
4552
  if (length == 0)
408
4210
    return;
409
410
42589
  while (pointer <= end) {
411
42223
    const char ch = pointer < end ? pointer[0] : kEOL;
412
42223
    int64_t remaining = end - pointer - 1;
413

42223
    if (ch == '.' || ch == kEOL) {
414
5552
      if (++parts > static_cast<int>(arraysize(numbers))) return;
415
5548
      if (pointer == mark)
416
12
        return;
417
5536
      int64_t n = ParseNumber(mark, pointer);
418
5536
      if (n < 0)
419
4166
        return;
420
421
1370
      if (n > 255) {
422
112
        tooBigNumbers++;
423
      }
424
1370
      numbers[parts - 1] = n;
425
1370
      mark = pointer + 1;
426

1370
      if (ch == '.' && remaining == 0)
427
4
        break;
428
    }
429
38037
    pointer++;
430
  }
431
370
  CHECK_GT(parts, 0);
432
370
  *is_ipv4 = true;
433
434
  // If any but the last item in numbers is greater than 255, return failure.
435
  // If the last item in numbers is greater than or equal to
436
  // 256^(5 - the number of items in numbers), return failure.
437
366
  if (tooBigNumbers > 1 ||
438

796
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
439
362
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
440
28
    return;
441
  }
442
443
342
  type_ = HostType::H_IPV4;
444
342
  val = static_cast<uint32_t>(numbers[parts - 1]);
445
1244
  for (int n = 0; n < parts - 1; n++) {
446
902
    double b = 3 - n;
447
902
    val +=
448
902
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
449
  }
450
451
342
  value_.ipv4 = val;
452
}
453
454
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
455
520
  CHECK_EQ(type_, HostType::H_FAILED);
456
520
  std::string output;
457
520
  output.reserve(length);
458
3053
  for (size_t i = 0; i < length; i++) {
459
2595
    const char ch = input[i];
460

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
461
62
      return;
462
    } else {
463
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
464
    }
465
  }
466
467
458
  SetOpaque(std::move(output));
468
}
469
470
5750
void URLHost::ParseHost(const char* input,
471
                        size_t length,
472
                        bool is_special,
473
                        bool unicode) {
474
5750
  CHECK_EQ(type_, HostType::H_FAILED);
475
5750
  const char* pointer = input;
476
477
5750
  if (length == 0)
478
1568
    return;
479
480
5750
  if (pointer[0] == '[') {
481
355
    if (pointer[length - 1] != ']')
482
8
      return;
483
347
    return ParseIPv6Host(++pointer, length - 2);
484
  }
485
486
5395
  if (!is_special)
487
520
    return ParseOpaqueHost(input, length);
488
489
  // First, we have to percent decode
490
4875
  std::string decoded = PercentDecode(input, length);
491
492
  // Then we have to punycode toASCII
493
4875
  if (!ToASCII(decoded, &decoded))
494
148
    return;
495
496
  // If any of the following characters are still present, we have to fail
497
99425
  for (size_t n = 0; n < decoded.size(); n++) {
498
94873
    const char ch = decoded[n];
499
94873
    if (IsForbiddenHostCodePoint(ch)) {
500
175
      return;
501
    }
502
  }
503
504
  // Check to see if it's an IPv4 IP address
505
  bool is_ipv4;
506
4552
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
507
4552
  if (is_ipv4)
508
370
    return;
509
510
  // If the unicode flag is set, run the result through punycode ToUnicode
511

4182
  if (unicode && !ToUnicode(decoded, &decoded))
512
    return;
513
514
  // It's not an IPv4 or IPv6 address, it must be a domain
515
4182
  SetDomain(std::move(decoded));
516
}
517
518
// Locates the longest sequence of 0 segments in an IPv6 address
519
// in order to use the :: compression when serializing
520
template <typename T>
521
255
T* FindLongestZeroSequence(T* values, size_t len) {
522
255
  T* start = values;
523
255
  T* end = start + len;
524
255
  T* result = nullptr;
525
526
255
  T* current = nullptr;
527
255
  unsigned counter = 0, longest = 1;
528
529
2295
  while (start < end) {
530
2040
    if (*start == 0) {
531
1735
      if (current == nullptr)
532
269
        current = start;
533
1735
      counter++;
534
    } else {
535
305
      if (counter > longest) {
536
247
        longest = counter;
537
247
        result = current;
538
      }
539
305
      counter = 0;
540
305
      current = nullptr;
541
    }
542
2040
    start++;
543
  }
544
255
  if (counter > longest)
545
6
    result = current;
546
255
  return result;
547
}
548
549
5237
std::string URLHost::ToStringMove() {
550
5237
  std::string return_value;
551
5237
  switch (type_) {
552
4640
    case HostType::H_DOMAIN:
553
    case HostType::H_OPAQUE:
554
4640
      return_value = std::move(value_.domain_or_opaque);
555
4640
      break;
556
597
    default:
557
597
      return_value = ToString();
558
597
      break;
559
  }
560
5237
  Reset();
561
5237
  return return_value;
562
}
563
564
597
std::string URLHost::ToString() const {
565
1194
  std::string dest;
566

597
  switch (type_) {
567
    case HostType::H_DOMAIN:
568
    case HostType::H_OPAQUE:
569
      return value_.domain_or_opaque;
570
342
    case HostType::H_IPV4: {
571
342
      dest.reserve(15);
572
342
      uint32_t value = value_.ipv4;
573
1710
      for (int n = 0; n < 4; n++) {
574
1368
        dest.insert(0, std::to_string(value % 256));
575
1368
        if (n < 3)
576
1026
          dest.insert(0, 1, '.');
577
1368
        value /= 256;
578
      }
579
342
      break;
580
    }
581
255
    case HostType::H_IPV6: {
582
255
      dest.reserve(41);
583
255
      dest += '[';
584
255
      const uint16_t* start = &value_.ipv6[0];
585
      const uint16_t* compress_pointer =
586
255
          FindLongestZeroSequence(start, 8);
587
255
      bool ignore0 = false;
588
2295
      for (int n = 0; n <= 7; n++) {
589
2040
        const uint16_t* piece = &value_.ipv6[n];
590

2040
        if (ignore0 && *piece == 0)
591
1715
          continue;
592
576
        else if (ignore0)
593
245
          ignore0 = false;
594
576
        if (compress_pointer == piece) {
595
251
          dest += n == 0 ? "::" : ":";
596
251
          ignore0 = true;
597
251
          continue;
598
        }
599
        char buf[5];
600
325
        snprintf(buf, sizeof(buf), "%x", *piece);
601
325
        dest += buf;
602
325
        if (n < 7)
603
76
          dest += ':';
604
      }
605
255
      dest += ']';
606
255
      break;
607
    }
608
    case HostType::H_FAILED:
609
      break;
610
  }
611
597
  return dest;
612
}
613
614
5408
bool ParseHost(const std::string& input,
615
               std::string* output,
616
               bool is_special,
617
               bool unicode = false) {
618
5408
  if (input.empty()) {
619
94
    output->clear();
620
94
    return true;
621
  }
622
10628
  URLHost host;
623
5314
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
624
5314
  if (host.ParsingFailed())
625
489
    return false;
626
4825
  *output = host.ToStringMove();
627
4825
  return true;
628
}
629
630
9096
std::vector<std::string> FromJSStringArray(Environment* env,
631
                                           Local<Array> array) {
632
9096
  std::vector<std::string> vec;
633
9096
  if (array->Length() > 0)
634
9080
    vec.reserve(array->Length());
635
136506
  for (size_t n = 0; n < array->Length(); n++) {
636
118314
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
637
118314
    if (val->IsString()) {
638
59157
      Utf8Value value(env->isolate(), val.As<String>());
639
59157
      vec.emplace_back(*value, value.length());
640
    }
641
  }
642
9096
  return vec;
643
}
644
645
9096
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
646
9096
  url_data base;
647
9096
  Local<Context> context = env->context();
648
649
  Local<Value> flags =
650
27288
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
651
9096
  if (flags->IsInt32())
652
18192
    base.flags = flags->Int32Value(context).FromJust();
653
654
  Local<Value> port =
655
27288
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
656
9096
  if (port->IsInt32())
657
80
    base.port = port->Int32Value(context).FromJust();
658
659
  Local<Value> scheme =
660
18192
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
661
9096
  base.scheme = Utf8Value(env->isolate(), scheme).out();
662
663
  auto GetStr = [&](std::string url_data::*member,
664
                    int flag,
665
                    Local<String> name,
666
45480
                    bool empty_as_present) {
667
90960
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
668
90960
    if (value->IsString()) {
669
52336
      Utf8Value utf8value(env->isolate(), value.As<String>());
670
26168
      (base.*member).assign(*utf8value, utf8value.length());
671

44360
      if (empty_as_present || value.As<String>()->Length() != 0) {
672
7998
        base.flags |= flag;
673
      }
674
    }
675
54576
  };
676
9096
  GetStr(&url_data::username,
677
         URL_FLAGS_HAS_USERNAME,
678
         env->username_string(),
679
         false);
680
9096
  GetStr(&url_data::password,
681
         URL_FLAGS_HAS_PASSWORD,
682
         env->password_string(),
683
         false);
684
9096
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
685
9096
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
686
9096
  GetStr(&url_data::fragment,
687
         URL_FLAGS_HAS_FRAGMENT,
688
         env->fragment_string(),
689
         true);
690
691
  Local<Value>
692
27288
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
693
9096
  if (path->IsArray()) {
694
9096
    base.flags |= URL_FLAGS_HAS_PATH;
695
9096
    base.path = FromJSStringArray(env, path.As<Array>());
696
  }
697
9096
  return base;
698
}
699
700
44139
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
701
44139
  url_data context;
702
  Local<Value> flags =
703
132417
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
704
44139
  if (flags->IsInt32()) {
705
    static constexpr int32_t kCopyFlagsMask =
706
        URL_FLAGS_SPECIAL |
707
        URL_FLAGS_CANNOT_BE_BASE |
708
        URL_FLAGS_HAS_USERNAME |
709
        URL_FLAGS_HAS_PASSWORD |
710
        URL_FLAGS_HAS_HOST;
711
44139
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
712
  }
713
  Local<Value> scheme =
714
132417
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
715
88278
  if (scheme->IsString()) {
716
88278
    Utf8Value value(env->isolate(), scheme);
717
44139
    context.scheme.assign(*value, value.length());
718
  }
719
  Local<Value> port =
720
132417
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
721
44139
  if (port->IsInt32())
722
243
    context.port = port.As<Int32>()->Value();
723
44139
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
724
    Local<Value> username =
725
221
        context_obj->Get(env->context(),
726
663
                         env->username_string()).ToLocalChecked();
727
442
    CHECK(username->IsString());
728
442
    Utf8Value value(env->isolate(), username);
729
221
    context.username.assign(*value, value.length());
730
  }
731
44139
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
732
    Local<Value> password =
733
209
        context_obj->Get(env->context(),
734
627
                         env->password_string()).ToLocalChecked();
735
418
    CHECK(password->IsString());
736
418
    Utf8Value value(env->isolate(), password);
737
209
    context.password.assign(*value, value.length());
738
  }
739
  Local<Value> host =
740
44139
      context_obj->Get(env->context(),
741
132417
                       env->host_string()).ToLocalChecked();
742
88278
  if (host->IsString()) {
743
88202
    Utf8Value value(env->isolate(), host);
744
44101
    context.host.assign(*value, value.length());
745
  }
746
44139
  return context;
747
}
748
749
// Single dot segment can be ".", "%2e", or "%2E"
750
2551579
bool IsSingleDotSegment(const std::string& str) {
751
2551579
  switch (str.size()) {
752
7511
    case 1:
753
7511
      return str == ".";
754
148534
    case 3:
755
148534
      return str[0] == '%' &&
756

148580
             str[1] == '2' &&
757
148580
             ASCIILowercase(str[2]) == 'e';
758
2395534
    default:
759
2395534
      return false;
760
  }
761
}
762
763
// Double dot segment can be:
764
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
765
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
766
1280739
bool IsDoubleDotSegment(const std::string& str) {
767

1280739
  switch (str.size()) {
768
5157
    case 2:
769
5157
      return str == "..";
770
362627
    case 4:
771

362627
      if (str[0] != '.' && str[0] != '%')
772
362608
        return false;
773
19
      return ((str[0] == '.' &&
774
13
               str[1] == '%' &&
775

8
               str[2] == '2' &&
776
42
               ASCIILowercase(str[3]) == 'e') ||
777
15
              (str[0] == '%' &&
778

12
               str[1] == '2' &&
779
6
               ASCIILowercase(str[2]) == 'e' &&
780
25
               str[3] == '.'));
781
73933
    case 6:
782
73933
      return (str[0] == '%' &&
783

24
              str[1] == '2' &&
784
12
              ASCIILowercase(str[2]) == 'e' &&
785
4
              str[3] == '%' &&
786

73949
              str[4] == '2' &&
787
73937
              ASCIILowercase(str[5]) == 'e');
788
839022
    default:
789
839022
      return false;
790
  }
791
}
792
793
11451
void ShortenUrlPath(struct url_data* url) {
794
11451
  if (url->path.empty()) return;
795


11655
  if (url->path.size() == 1 && url->scheme == "file:" &&
796
610
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
797
11045
  url->path.pop_back();
798
}
799
800
}  // anonymous namespace
801
802
231560
void URL::Parse(const char* input,
803
                size_t len,
804
                enum url_parse_state state_override,
805
                struct url_data* url,
806
                bool has_url,
807
                const struct url_data* base,
808
                bool has_base) {
809
231560
  const char* p = input;
810
231560
  const char* end = input + len;
811
812
231560
  if (!has_url) {
813
151799
    for (const char* ptr = p; ptr < end; ptr++) {
814
151780
      if (IsC0ControlOrSpace(*ptr))
815
56
        p++;
816
      else
817
151724
        break;
818
    }
819
151791
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
820
151772
      if (IsC0ControlOrSpace(*ptr))
821
48
        end--;
822
      else
823
151724
        break;
824
    }
825
151743
    input = p;
826
151743
    len = end - p;
827
  }
828
829
  // The spec says we should strip out any ASCII tabs or newlines.
830
  // In those cases, we create another std::string instance with the filtered
831
  // contents, but in the general case we avoid the overhead.
832
231560
  std::string whitespace_stripped;
833
14649018
  for (const char* ptr = p; ptr < end; ptr++) {
834
14417628
    if (!IsASCIITabOrNewline(*ptr))
835
14417458
      continue;
836
    // Hit tab or newline. Allocate storage, copy what we have until now,
837
    // and then iterate and filter all similar characters out.
838
170
    whitespace_stripped.reserve(len - 1);
839
170
    whitespace_stripped.assign(p, ptr - p);
840
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
841
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
842
853
      if (!IsASCIITabOrNewline(*ptr))
843
769
        whitespace_stripped += *ptr;
844
    }
845
846
    // Update variables like they should have looked like if the string
847
    // had been stripped of whitespace to begin with.
848
170
    input = whitespace_stripped.c_str();
849
170
    len = whitespace_stripped.size();
850
170
    p = input;
851
170
    end = input + len;
852
170
    break;
853
  }
854
855
231560
  bool atflag = false;  // Set when @ has been seen.
856
231560
  bool square_bracket_flag = false;  // Set inside of [...]
857
231560
  bool password_token_seen_flag = false;  // Set after a : after an username.
858
859
231560
  std::string buffer;
860
861
  // Set the initial parse state.
862
231560
  const bool has_state_override = state_override != kUnknownState;
863
231560
  enum url_parse_state state = has_state_override ? state_override :
864
                                                    kSchemeStart;
865
866

231560
  if (state < kSchemeStart || state > kFragment) {
867
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
868
    return;
869
  }
870
871
15189671
  while (p <= end) {
872
14967074
    const char ch = p < end ? p[0] : kEOL;
873
14967074
    bool special = (url->flags & URL_FLAGS_SPECIAL);
874
    bool cannot_be_base;
875

14967074
    bool special_back_slash = (special && ch == '\\');
876
877





14967074
    switch (state) {
878
151826
      case kSchemeStart:
879
151826
        if (IsASCIIAlpha(ch)) {
880
138452
          buffer += ASCIILowercase(ch);
881
138452
          state = kScheme;
882
13374
        } else if (!has_state_override) {
883
13364
          state = kNoScheme;
884
13364
          continue;
885
        } else {
886
10
          url->flags |= URL_FLAGS_FAILED;
887
10
          return;
888
        }
889
138452
        break;
890
572816
      case kScheme:
891


572816
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
892
434364
          buffer += ASCIILowercase(ch);
893

138452
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
894

136413
          if (has_state_override && buffer.size() == 0) {
895
            url->flags |= URL_FLAGS_TERMINATED;
896
            return;
897
          }
898
136413
          buffer += ':';
899
900
136413
          bool new_is_special = IsSpecial(buffer);
901
902
136413
          if (has_state_override) {
903
45
            if ((special != new_is_special) ||
904
45
                ((buffer == "file:") &&
905
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
906
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
907


116
                  (url->port != -1))) ||
908
45
                  (url->scheme == "file:" && url->host.empty())) {
909
32
              url->flags |= URL_FLAGS_TERMINATED;
910
32
              return;
911
            }
912
          }
913
914
136381
          url->scheme = std::move(buffer);
915
136381
          url->port = NormalizePort(url->scheme, url->port);
916
136381
          if (new_is_special) {
917
130500
            url->flags |= URL_FLAGS_SPECIAL;
918
130500
            special = true;
919
          } else {
920
5881
            url->flags &= ~URL_FLAGS_SPECIAL;
921
5881
            special = false;
922
          }
923
          // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
924
          // here always not equals to `\\`. So `special_back_slash` here always
925
          // equals to `false`.
926
136381
          special_back_slash = false;
927
136381
          buffer.clear();
928
136381
          if (has_state_override)
929
33
            return;
930
136348
          if (url->scheme == "file:") {
931
126083
            state = kFile;
932
4400
          } else if (special &&
933

14665
                     has_base &&
934
1039
                     url->scheme == base->scheme) {
935
329
            state = kSpecialRelativeOrAuthority;
936
9936
          } else if (special) {
937
4071
            state = kSpecialAuthoritySlashes;
938

5865
          } else if (p + 1 < end && p[1] == '/') {
939
716
            state = kPathOrAuthority;
940
716
            p++;
941
          } else {
942
5149
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
943
5149
            url->flags |= URL_FLAGS_HAS_PATH;
944
5149
            url->path.emplace_back("");
945
5149
            state = kCannotBeBase;
946
136348
          }
947
2039
        } else if (!has_state_override) {
948
2031
          buffer.clear();
949
2031
          state = kNoScheme;
950
2031
          p = input;
951
2031
          continue;
952
        } else {
953
8
          url->flags |= URL_FLAGS_FAILED;
954
8
          return;
955
        }
956
570712
        break;
957
15395
      case kNoScheme:
958

15395
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
959

15395
        if (!has_base || (cannot_be_base && ch != '#')) {
960
7979
          url->flags |= URL_FLAGS_FAILED;
961
7979
          return;
962

7416
        } else if (cannot_be_base && ch == '#') {
963
28
          url->scheme = base->scheme;
964
28
          if (IsSpecial(url->scheme)) {
965
            url->flags |= URL_FLAGS_SPECIAL;
966
            special = true;
967
          } else {
968
28
            url->flags &= ~URL_FLAGS_SPECIAL;
969
28
            special = false;
970
          }
971

28
          special_back_slash = (special && ch == '\\');
972
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
973
28
            url->flags |= URL_FLAGS_HAS_PATH;
974
28
            url->path = base->path;
975
          }
976
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
977
4
            url->flags |= URL_FLAGS_HAS_QUERY;
978
4
            url->query = base->query;
979
          }
980
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
981
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
982
            url->fragment = base->fragment;
983
          }
984
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
985
28
          state = kFragment;
986

14776
        } else if (has_base &&
987
7388
                   base->scheme != "file:") {
988
536
          state = kRelative;
989
536
          continue;
990
        } else {
991
6852
          url->scheme = "file:";
992
6852
          url->flags |= URL_FLAGS_SPECIAL;
993
6852
          special = true;
994
6852
          state = kFile;
995

6852
          special_back_slash = (special && ch == '\\');
996
6852
          continue;
997
        }
998
28
        break;
999
329
      case kSpecialRelativeOrAuthority:
1000

329
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1001
297
          state = kSpecialAuthorityIgnoreSlashes;
1002
297
          p++;
1003
        } else {
1004
32
          state = kRelative;
1005
32
          continue;
1006
        }
1007
297
        break;
1008
716
      case kPathOrAuthority:
1009
716
        if (ch == '/') {
1010
548
          state = kAuthority;
1011
        } else {
1012
168
          state = kPath;
1013
168
          continue;
1014
        }
1015
548
        break;
1016
568
      case kRelative:
1017
568
        url->scheme = base->scheme;
1018
568
        if (IsSpecial(url->scheme)) {
1019
468
          url->flags |= URL_FLAGS_SPECIAL;
1020
468
          special = true;
1021
        } else {
1022
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1023
100
          special = false;
1024
        }
1025

568
        special_back_slash = (special && ch == '\\');
1026

568
        switch (ch) {
1027
18
          case kEOL:
1028
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1029
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1030
4
              url->username = base->username;
1031
            }
1032
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1033
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1034
4
              url->password = base->password;
1035
            }
1036
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1037
16
              url->flags |= URL_FLAGS_HAS_HOST;
1038
16
              url->host = base->host;
1039
            }
1040
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1041
              url->flags |= URL_FLAGS_HAS_QUERY;
1042
              url->query = base->query;
1043
            }
1044
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1045
18
              url->flags |= URL_FLAGS_HAS_PATH;
1046
18
              url->path = base->path;
1047
            }
1048
18
            url->port = base->port;
1049
18
            break;
1050
275
          case '/':
1051
275
            state = kRelativeSlash;
1052
275
            break;
1053
38
          case '?':
1054
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1055
              url->flags |= URL_FLAGS_HAS_USERNAME;
1056
              url->username = base->username;
1057
            }
1058
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1059
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1060
              url->password = base->password;
1061
            }
1062
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1063
34
              url->flags |= URL_FLAGS_HAS_HOST;
1064
34
              url->host = base->host;
1065
            }
1066
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1067
38
              url->flags |= URL_FLAGS_HAS_PATH;
1068
38
              url->path = base->path;
1069
            }
1070
38
            url->port = base->port;
1071
38
            state = kQuery;
1072
38
            break;
1073
38
          case '#':
1074
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1075
              url->flags |= URL_FLAGS_HAS_USERNAME;
1076
              url->username = base->username;
1077
            }
1078
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1079
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1080
              url->password = base->password;
1081
            }
1082
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1083
34
              url->flags |= URL_FLAGS_HAS_HOST;
1084
34
              url->host = base->host;
1085
            }
1086
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1087
              url->flags |= URL_FLAGS_HAS_QUERY;
1088
              url->query = base->query;
1089
            }
1090
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1091
38
              url->flags |= URL_FLAGS_HAS_PATH;
1092
38
              url->path = base->path;
1093
            }
1094
38
            url->port = base->port;
1095
38
            state = kFragment;
1096
38
            break;
1097
199
          default:
1098
199
            if (special_back_slash) {
1099
18
              state = kRelativeSlash;
1100
            } else {
1101
181
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1102
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1103
1
                url->username = base->username;
1104
              }
1105
181
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1106
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1107
1
                url->password = base->password;
1108
              }
1109
181
              if (base->flags & URL_FLAGS_HAS_HOST) {
1110
161
                url->flags |= URL_FLAGS_HAS_HOST;
1111
161
                url->host = base->host;
1112
              }
1113
181
              if (base->flags & URL_FLAGS_HAS_PATH) {
1114
181
                url->flags |= URL_FLAGS_HAS_PATH;
1115
181
                url->path = base->path;
1116
181
                ShortenUrlPath(url);
1117
              }
1118
181
              url->port = base->port;
1119
181
              state = kPath;
1120
181
              continue;
1121
            }
1122
        }
1123
387
        break;
1124
293
      case kRelativeSlash:
1125


293
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1126
22
          state = kSpecialAuthorityIgnoreSlashes;
1127
271
        } else if (ch == '/') {
1128
6
          state = kAuthority;
1129
        } else {
1130
265
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1131
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1132
8
            url->username = base->username;
1133
          }
1134
265
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1135
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1136
4
            url->password = base->password;
1137
          }
1138
265
          if (base->flags & URL_FLAGS_HAS_HOST) {
1139
257
            url->flags |= URL_FLAGS_HAS_HOST;
1140
257
            url->host = base->host;
1141
          }
1142
265
          url->port = base->port;
1143
265
          state = kPath;
1144
265
          continue;
1145
        }
1146
28
        break;
1147
4071
      case kSpecialAuthoritySlashes:
1148
4071
        state = kSpecialAuthorityIgnoreSlashes;
1149

4071
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1150
3922
          p++;
1151
        } else {
1152
149
          continue;
1153
        }
1154
3922
        break;
1155
4467
      case kSpecialAuthorityIgnoreSlashes:
1156

4467
        if (ch != '/' && ch != '\\') {
1157
4390
          state = kAuthority;
1158
4390
          continue;
1159
        }
1160
77
        break;
1161
101983
      case kAuthority:
1162
101983
        if (ch == '@') {
1163
565
          if (atflag) {
1164
41
            buffer.reserve(buffer.size() + 3);
1165
41
            buffer.insert(0, "%40");
1166
          }
1167
565
          atflag = true;
1168
565
          size_t blen = buffer.size();
1169

565
          if (blen > 0 && buffer[0] != ':') {
1170
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1171
          }
1172
6652
          for (size_t n = 0; n < blen; n++) {
1173
6087
            const char bch = buffer[n];
1174
6087
            if (bch == ':') {
1175
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1176
444
              if (!password_token_seen_flag) {
1177
428
                password_token_seen_flag = true;
1178
428
                continue;
1179
              }
1180
            }
1181
5659
            if (password_token_seen_flag) {
1182
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1183
            } else {
1184
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1185
            }
1186
          }
1187
565
          buffer.clear();
1188

101418
        } else if (ch == kEOL ||
1189
96544
                   ch == '/' ||
1190
96512
                   ch == '?' ||
1191
96494
                   ch == '#' ||
1192
                   special_back_slash) {
1193

4944
          if (atflag && buffer.size() == 0) {
1194
52
            url->flags |= URL_FLAGS_FAILED;
1195
52
            return;
1196
          }
1197
4892
          p -= buffer.size() + 1;
1198
4892
          buffer.clear();
1199
4892
          state = kHost;
1200
        } else {
1201
96474
          buffer += ch;
1202
        }
1203
101931
        break;
1204
92500
      case kHost:
1205
      case kHostname:
1206

92500
        if (has_state_override && url->scheme == "file:") {
1207
12
          state = kFileHost;
1208
12
          continue;
1209

92488
        } else if (ch == ':' && !square_bracket_flag) {
1210
1850
          if (buffer.size() == 0) {
1211
24
            url->flags |= URL_FLAGS_FAILED;
1212
24
            return;
1213
          }
1214
1826
          if (state_override == kHostname) {
1215
4
            return;
1216
          }
1217
1822
          url->flags |= URL_FLAGS_HAS_HOST;
1218
1822
          if (!ParseHost(buffer, &url->host, special)) {
1219
5
            url->flags |= URL_FLAGS_FAILED;
1220
5
            return;
1221
          }
1222
1817
          buffer.clear();
1223
1817
          state = kPort;
1224

90638
        } else if (ch == kEOL ||
1225
87306
                   ch == '/' ||
1226
87266
                   ch == '?' ||
1227
87240
                   ch == '#' ||
1228
                   special_back_slash) {
1229
3422
          p--;
1230

3422
          if (special && buffer.size() == 0) {
1231
21
            url->flags |= URL_FLAGS_FAILED;
1232
21
            return;
1233
          }
1234
331
          if (has_state_override &&
1235

3770
              buffer.size() == 0 &&
1236
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1237
38
               url->port != -1)) {
1238
8
            url->flags |= URL_FLAGS_TERMINATED;
1239
8
            return;
1240
          }
1241
3393
          url->flags |= URL_FLAGS_HAS_HOST;
1242
3393
          if (!ParseHost(buffer, &url->host, special)) {
1243
432
            url->flags |= URL_FLAGS_FAILED;
1244
432
            return;
1245
          }
1246
2961
          buffer.clear();
1247
2961
          state = kPathStart;
1248
2961
          if (has_state_override) {
1249
227
            return;
1250
          }
1251
        } else {
1252
87216
          if (ch == '[')
1253
349
            square_bracket_flag = true;
1254
87216
          if (ch == ']')
1255
345
            square_bracket_flag = false;
1256
87216
          buffer += ch;
1257
        }
1258
91767
        break;
1259
9917
      case kPort:
1260
9917
        if (IsASCIIDigit(ch)) {
1261
8031
          buffer += ch;
1262

1886
        } else if (has_state_override ||
1263
1134
                   ch == kEOL ||
1264
36
                   ch == '/' ||
1265
36
                   ch == '?' ||
1266
36
                   ch == '#' ||
1267
                   special_back_slash) {
1268
1850
          if (buffer.size() > 0) {
1269
1836
            unsigned port = 0;
1270
            // the condition port <= 0xffff prevents integer overflow
1271

9651
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1272
7815
              port = port * 10 + buffer[i] - '0';
1273
1836
            if (port > 0xffff) {
1274
              // TODO(TimothyGu): This hack is currently needed for the host
1275
              // setter since it needs access to hostname if it is valid, and
1276
              // if the FAILED flag is set the entire response to JS layer
1277
              // will be empty.
1278
26
              if (state_override == kHost)
1279
2
                url->port = -1;
1280
              else
1281
24
                url->flags |= URL_FLAGS_FAILED;
1282
26
              return;
1283
            }
1284
            // the port is valid
1285
1810
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1286
1810
            if (url->port == -1)
1287
257
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1288
1810
            buffer.clear();
1289
14
          } else if (has_state_override) {
1290
            // TODO(TimothyGu): Similar case as above.
1291
6
            if (state_override == kHost)
1292
2
              url->port = -1;
1293
            else
1294
4
              url->flags |= URL_FLAGS_TERMINATED;
1295
6
            return;
1296
          }
1297
1818
          state = kPathStart;
1298
1818
          continue;
1299
        } else {
1300
36
          url->flags |= URL_FLAGS_FAILED;
1301
36
          return;
1302
        }
1303
8031
        break;
1304
132935
      case kFile:
1305
132935
        url->scheme = "file:";
1306
132935
        url->host.clear();
1307
132935
        url->flags |= URL_FLAGS_HAS_HOST;
1308

132935
        if (ch == '/' || ch == '\\') {
1309
126201
          state = kFileSlash;
1310

6734
        } else if (has_base && base->scheme == "file:") {
1311

6715
          switch (ch) {
1312
4
            case kEOL:
1313
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1314
4
                url->host = base->host;
1315
              }
1316
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1317
4
                url->flags |= URL_FLAGS_HAS_PATH;
1318
4
                url->path = base->path;
1319
              }
1320
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1321
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1322
4
                url->query = base->query;
1323
              }
1324
4
              break;
1325
4
            case '?':
1326
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1327
4
                url->host = base->host;
1328
              }
1329
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1330
4
                url->flags |= URL_FLAGS_HAS_PATH;
1331
4
                url->path = base->path;
1332
              }
1333
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1334
4
              url->query.clear();
1335
4
              state = kQuery;
1336
4
              break;
1337
4
            case '#':
1338
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1339
4
                url->host = base->host;
1340
              }
1341
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1342
4
                url->flags |= URL_FLAGS_HAS_PATH;
1343
4
                url->path = base->path;
1344
              }
1345
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1346
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1347
4
                url->query = base->query;
1348
              }
1349
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1350
4
              url->fragment.clear();
1351
4
              state = kFragment;
1352
4
              break;
1353
6703
            default:
1354
6703
              url->query.clear();
1355
6703
              if (base->flags & URL_FLAGS_HAS_HOST) {
1356
6703
                url->host = base->host;
1357
              }
1358
6703
              if (base->flags & URL_FLAGS_HAS_PATH) {
1359
6703
                url->flags |= URL_FLAGS_HAS_PATH;
1360
6703
                url->path = base->path;
1361
              }
1362
6703
              if (!StartsWithWindowsDriveLetter(p, end)) {
1363
6679
                ShortenUrlPath(url);
1364
              } else {
1365
24
                url->path.clear();
1366
              }
1367
6703
              state = kPath;
1368
6703
              continue;
1369
          }
1370
        } else {
1371
19
          state = kPath;
1372
19
          continue;
1373
        }
1374
126213
        break;
1375
126201
      case kFileSlash:
1376

126201
        if (ch == '/' || ch == '\\') {
1377
126059
          state = kFileHost;
1378
        } else {
1379

142
          if (has_base && base->scheme == "file:") {
1380
128
            url->flags |= URL_FLAGS_HAS_HOST;
1381
128
            url->host = base->host;
1382

242
            if (!StartsWithWindowsDriveLetter(p, end) &&
1383
114
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1384
4
              url->flags |= URL_FLAGS_HAS_PATH;
1385
4
              url->path.push_back(base->path[0]);
1386
            }
1387
          }
1388
142
          state = kPath;
1389
142
          continue;
1390
        }
1391
126059
        break;
1392
127176
      case kFileHost:
1393

127176
        if (ch == kEOL ||
1394
1115
            ch == '/' ||
1395
1105
            ch == '\\' ||
1396
1105
            ch == '?' ||
1397
            ch == '#') {
1398
126059
          if (!has_state_override &&
1399

252130
              buffer.size() == 2 &&
1400
22
              IsWindowsDriveLetter(buffer)) {
1401
12
            state = kPath;
1402
126059
          } else if (buffer.size() == 0) {
1403
125866
            url->flags |= URL_FLAGS_HAS_HOST;
1404
125866
            url->host.clear();
1405
125866
            if (has_state_override)
1406
4
              return;
1407
125862
            state = kPathStart;
1408
          } else {
1409
193
            std::string host;
1410
193
            if (!ParseHost(buffer, &host, special)) {
1411
52
              url->flags |= URL_FLAGS_FAILED;
1412
52
              return;
1413
            }
1414
141
            if (host == "localhost")
1415
37
              host.clear();
1416
141
            url->flags |= URL_FLAGS_HAS_HOST;
1417
141
            url->host = host;
1418
141
            if (has_state_override)
1419
4
              return;
1420
137
            buffer.clear();
1421
137
            state = kPathStart;
1422
          }
1423
126011
          continue;
1424
        } else {
1425
1105
          buffer += ch;
1426
        }
1427
1105
        break;
1428
209591
      case kPathStart:
1429
209591
        if (IsSpecial(url->scheme)) {
1430
209043
          state = kPath;
1431

209043
          if (ch != '/' && ch != '\\') {
1432
80129
            continue;
1433
          }
1434

548
        } else if (!has_state_override && ch == '?') {
1435
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1436
6
          url->query.clear();
1437
6
          state = kQuery;
1438

542
        } else if (!has_state_override && ch == '#') {
1439
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1440
6
          url->fragment.clear();
1441
6
          state = kFragment;
1442
536
        } else if (ch != kEOL) {
1443
459
          state = kPath;
1444
459
          if (ch != '/') {
1445
35
            continue;
1446
          }
1447

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1448
2
          url->flags |= URL_FLAGS_HAS_PATH;
1449
2
          url->path.emplace_back("");
1450
        }
1451
129427
        break;
1452
13301643
      case kPath:
1453

13301643
        if (ch == kEOL ||
1454
12021978
            ch == '/' ||
1455
12021908
            special_back_slash ||
1456

12021908
            (!has_state_override && (ch == '?' || ch == '#'))) {
1457
1280739
          if (IsDoubleDotSegment(buffer)) {
1458
4591
            ShortenUrlPath(url);
1459

4591
            if (ch != '/' && !special_back_slash) {
1460
280
              url->flags |= URL_FLAGS_HAS_PATH;
1461
280
              url->path.emplace_back("");
1462
            }
1463
1279292
          } else if (IsSingleDotSegment(buffer) &&
1464

1279292
                     ch != '/' && !special_back_slash) {
1465
717
            url->flags |= URL_FLAGS_HAS_PATH;
1466
717
            url->path.emplace_back("");
1467
1275431
          } else if (!IsSingleDotSegment(buffer)) {
1468
2538990
            if (url->scheme == "file:" &&
1469
1435930
                url->path.empty() &&
1470

2708934
                buffer.size() == 2 &&
1471
100
                IsWindowsDriveLetter(buffer)) {
1472
98
              buffer[1] = ':';
1473
            }
1474
1273004
            url->flags |= URL_FLAGS_HAS_PATH;
1475
1273004
            url->path.emplace_back(std::move(buffer));
1476
          }
1477
1280739
          buffer.clear();
1478
2561478
          if (ch == '?') {
1479
948
            url->flags |= URL_FLAGS_HAS_QUERY;
1480
948
            url->query.clear();
1481
948
            state = kQuery;
1482
1279791
          } else if (ch == '#') {
1483
56
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1484
56
            url->fragment.clear();
1485
56
            state = kFragment;
1486
          }
1487
        } else {
1488
12020904
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1489
        }
1490
13301643
        break;
1491
50753
      case kCannotBeBase:
1492
50753
        switch (ch) {
1493
6
          case '?':
1494
6
            state = kQuery;
1495
6
            break;
1496
10
          case '#':
1497
10
            state = kFragment;
1498
10
            break;
1499
50737
          default:
1500
50737
            if (url->path.empty())
1501
              url->path.emplace_back("");
1502
50737
            else if (ch != kEOL)
1503
45604
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1504
        }
1505
50753
        break;
1506
59629
      case kQuery:
1507

59629
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1508
1161
          url->flags |= URL_FLAGS_HAS_QUERY;
1509
1161
          url->query = std::move(buffer);
1510
1161
          buffer.clear();
1511
1551
          if (ch == '#')
1512
390
            state = kFragment;
1513
        } else {
1514
58468
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1515
                                                QUERY_ENCODE_SET_NONSPECIAL);
1516
        }
1517
59629
        break;
1518
4265
      case kFragment:
1519
4265
        switch (ch) {
1520
606
          case kEOL:
1521
606
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1522
606
            url->fragment = std::move(buffer);
1523
606
            break;
1524
3659
          default:
1525
3659
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1526
        }
1527
4265
        break;
1528
      default:
1529
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1530
        return;
1531
    }
1532
1533
14715274
    p++;
1534
  }
1535
}  // NOLINT(readability/fn_size)
1536
1537
// https://url.spec.whatwg.org/#url-serializing
1538
35678
std::string URL::SerializeURL(const url_data& url,
1539
                              bool exclude = false) {
1540
35678
  std::string output;
1541
35678
  output.reserve(
1542
    10 +  // We generally insert < 10 separator characters between URL parts
1543
35678
    url.scheme.size() +
1544
35678
    url.username.size() +
1545
35678
    url.password.size() +
1546
35678
    url.host.size() +
1547
35678
    url.query.size() +
1548
35678
    url.fragment.size() +
1549
35678
    url.href.size() +
1550
35678
    std::accumulate(
1551
        url.path.begin(),
1552
        url.path.end(),
1553
        0,
1554
376796
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1555
1556
35678
  output += url.scheme;
1557
35678
  if (url.flags & URL_FLAGS_HAS_HOST) {
1558
35678
    output += "//";
1559
35678
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1560
35678
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1561
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1562
        output += url.username;
1563
      }
1564
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1565
        output += ":" + url.password;
1566
      }
1567
      output += "@";
1568
    }
1569
35678
    output += url.host;
1570
35678
    if (url.port != -1) {
1571
      output += ":" + std::to_string(url.port);
1572
    }
1573
  }
1574
35678
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1575
    output += url.path[0];
1576
  } else {
1577
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1578

35678
          url.path.size() > 1 &&
1579
          url.path[0].empty()) {
1580
      output += "/.";
1581
    }
1582
376796
    for (size_t i = 1; i < url.path.size(); i++) {
1583
341118
      output += "/" + url.path[i];
1584
    }
1585
  }
1586
35678
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1587
    output += "?" + url.query;
1588
  }
1589

35678
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1590
    output += "#" + url.fragment;
1591
  }
1592
35678
  output.shrink_to_fit();
1593
35678
  return output;
1594
}
1595
1596
namespace {
1597
151492
void SetArgs(Environment* env,
1598
             Local<Value> argv[ARG_COUNT],
1599
             const struct url_data& url) {
1600
151492
  Isolate* isolate = env->isolate();
1601
151492
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1602
302984
  argv[ARG_PROTOCOL] =
1603
151492
      url.flags & URL_FLAGS_SPECIAL ?
1604
145386
          GetSpecial(env, url.scheme) :
1605
6106
          OneByteString(isolate, url.scheme.c_str());
1606
151492
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1607
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1608
151492
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1609
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1610
151492
  if (url.flags & URL_FLAGS_HAS_HOST)
1611
292164
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1612
151492
  if (url.flags & URL_FLAGS_HAS_QUERY)
1613
2338
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1614
151492
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1615
1204
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1616
151492
  if (url.port > -1)
1617
3484
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1618
151492
  if (url.flags & URL_FLAGS_HAS_PATH)
1619
301792
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1620
151492
}
1621
1622
160175
void Parse(Environment* env,
1623
           Local<Value> recv,
1624
           const char* input,
1625
           size_t len,
1626
           enum url_parse_state state_override,
1627
           Local<Value> base_obj,
1628
           Local<Value> context_obj,
1629
           Local<Function> cb,
1630
           Local<Value> error_cb) {
1631
160175
  Isolate* isolate = env->isolate();
1632
160175
  Local<Context> context = env->context();
1633
160175
  HandleScope handle_scope(isolate);
1634
160175
  Context::Scope context_scope(context);
1635
1636
160175
  const bool has_context = context_obj->IsObject();
1637
160175
  const bool has_base = base_obj->IsObject();
1638
1639
160175
  url_data base;
1640
160175
  url_data url;
1641
160175
  if (has_context)
1642
44139
    url = HarvestContext(env, context_obj.As<Object>());
1643
160175
  if (has_base)
1644
9096
    base = HarvestBase(env, base_obj.As<Object>());
1645
1646
160175
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1647

160175
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1648
44139
      ((state_override != kUnknownState) &&
1649
44139
       (url.flags & URL_FLAGS_TERMINATED)))
1650
44
    return;
1651
1652
  // Define the return value placeholders
1653
160131
  const Local<Value> undef = Undefined(isolate);
1654
160131
  const Local<Value> null = Null(isolate);
1655
160131
  if (!(url.flags & URL_FLAGS_FAILED)) {
1656
    Local<Value> argv[] = {
1657
      undef,
1658
      undef,
1659
      undef,
1660
      undef,
1661
      null,  // host defaults to null
1662
      null,  // port defaults to null
1663
      undef,
1664
      null,  // query defaults to null
1665
      null,  // fragment defaults to null
1666
151492
    };
1667
151492
    SetArgs(env, argv, url);
1668
151492
    USE(cb->Call(context, recv, arraysize(argv), argv));
1669
8639
  } else if (error_cb->IsFunction()) {
1670
17018
    Local<Value> flags = Integer::NewFromUnsigned(isolate, url.flags);
1671
8509
    USE(error_cb.As<Function>()->Call(context, recv, 1, &flags));
1672
  }
1673
}
1674
1675
160175
void Parse(const FunctionCallbackInfo<Value>& args) {
1676
160175
  Environment* env = Environment::GetCurrent(args);
1677
160175
  CHECK_GE(args.Length(), 5);
1678
320350
  CHECK(args[0]->IsString());  // input
1679


435916
  CHECK(args[2]->IsUndefined() ||  // base context
1680
        args[2]->IsNull() ||
1681
        args[2]->IsObject());
1682


452767
  CHECK(args[3]->IsUndefined() ||  // context
1683
        args[3]->IsNull() ||
1684
        args[3]->IsObject());
1685
160175
  CHECK(args[4]->IsFunction());  // complete callback
1686

436386
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1687
1688
160175
  Utf8Value input(env->isolate(), args[0]);
1689
160175
  enum url_parse_state state_override = kUnknownState;
1690
160175
  if (args[1]->IsNumber()) {
1691
160175
    state_override = static_cast<enum url_parse_state>(
1692
320350
        args[1]->Uint32Value(env->context()).FromJust());
1693
  }
1694
1695
320350
  Parse(env, args.This(),
1696
160175
        *input, input.length(),
1697
        state_override,
1698
        args[2],
1699
        args[3],
1700
320350
        args[4].As<Function>(),
1701
        args[5]);
1702
160175
}
1703
1704
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1705
92
  Environment* env = Environment::GetCurrent(args);
1706
92
  CHECK_GE(args.Length(), 1);
1707
184
  CHECK(args[0]->IsString());
1708
184
  Utf8Value value(env->isolate(), args[0]);
1709
92
  std::string output;
1710
92
  size_t len = value.length();
1711
92
  output.reserve(len);
1712
756
  for (size_t n = 0; n < len; n++) {
1713
664
    const char ch = (*value)[n];
1714
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1715
  }
1716
276
  args.GetReturnValue().Set(
1717
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1718
92
}
1719
1720
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1721
229
  Environment* env = Environment::GetCurrent(args);
1722
229
  CHECK_GE(args.Length(), 1);
1723
458
  CHECK(args[0]->IsString());
1724
229
  Utf8Value value(env->isolate(), args[0]);
1725
1726
229
  URLHost host;
1727
  // Assuming the host is used for a special scheme.
1728
229
  host.ParseHost(*value, value.length(), true);
1729
229
  if (host.ParsingFailed()) {
1730
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1731
12
    return;
1732
  }
1733
217
  std::string out = host.ToStringMove();
1734
651
  args.GetReturnValue().Set(
1735
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1736
}
1737
1738
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1739
207
  Environment* env = Environment::GetCurrent(args);
1740
207
  CHECK_GE(args.Length(), 1);
1741
414
  CHECK(args[0]->IsString());
1742
207
  Utf8Value value(env->isolate(), args[0]);
1743
1744
207
  URLHost host;
1745
  // Assuming the host is used for a special scheme.
1746
207
  host.ParseHost(*value, value.length(), true, true);
1747
207
  if (host.ParsingFailed()) {
1748
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1749
12
    return;
1750
  }
1751
195
  std::string out = host.ToStringMove();
1752
585
  args.GetReturnValue().Set(
1753
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1754
}
1755
1756
852
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1757
852
  Environment* env = Environment::GetCurrent(args);
1758
852
  CHECK_EQ(args.Length(), 1);
1759
852
  CHECK(args[0]->IsFunction());
1760
1704
  env->set_url_constructor_function(args[0].As<Function>());
1761
852
}
1762
1763
852
void Initialize(Local<Object> target,
1764
                Local<Value> unused,
1765
                Local<Context> context,
1766
                void* priv) {
1767
852
  Environment* env = Environment::GetCurrent(context);
1768
852
  env->SetMethod(target, "parse", Parse);
1769
852
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1770
852
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1771
852
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1772
852
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1773
1774
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1775
23004
  FLAGS(XX)
1776
#undef XX
1777
1778
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1779
35784
  PARSESTATES(XX)
1780
#undef XX
1781
852
}
1782
}  // namespace
1783
1784
5201
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1785
5201
  registry->Register(Parse);
1786
5201
  registry->Register(EncodeAuthSet);
1787
5201
  registry->Register(DomainToASCII);
1788
5201
  registry->Register(DomainToUnicode);
1789
5201
  registry->Register(SetURLConstructor);
1790
5201
}
1791
1792
8
std::string URL::ToFilePath() const {
1793
8
  if (context_.scheme != "file:") {
1794
1
    return "";
1795
  }
1796
1797
#ifdef _WIN32
1798
  const char* slash = "\\";
1799
  auto is_slash = [] (char ch) {
1800
    return ch == '/' || ch == '\\';
1801
  };
1802
#else
1803
7
  const char* slash = "/";
1804
46
  auto is_slash = [] (char ch) {
1805
46
    return ch == '/';
1806
  };
1807

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1808
7
      context_.host.length() > 0) {
1809
1
    return "";
1810
  }
1811
#endif
1812
12
  std::string decoded_path;
1813
18
  for (const std::string& part : context_.path) {
1814
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1815
58
    for (char& ch : decoded) {
1816
46
      if (is_slash(ch)) {
1817
1
        return "";
1818
      }
1819
    }
1820
12
    decoded_path += slash + decoded;
1821
  }
1822
1823
#ifdef _WIN32
1824
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1825
1826
  // If hostname is set, then we have a UNC path. Pass the hostname through
1827
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1828
  // need to worry about percent encoding because the URL parser will have
1829
  // already taken care of that for us. Note that this only causes IDNs with an
1830
  // appropriate `xn--` prefix to be decoded.
1831
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1832
      context_.host.length() > 0) {
1833
    std::string unicode_host;
1834
    if (!ToUnicode(context_.host, &unicode_host)) {
1835
      return "";
1836
    }
1837
    return "\\\\" + unicode_host + decoded_path;
1838
  }
1839
  // Otherwise, it's a local path that requires a drive letter.
1840
  if (decoded_path.length() < 3) {
1841
    return "";
1842
  }
1843
  if (decoded_path[2] != ':' ||
1844
      !IsASCIIAlpha(decoded_path[1])) {
1845
    return "";
1846
  }
1847
  // Strip out the leading '\'.
1848
  return decoded_path.substr(1);
1849
#else
1850
5
  return decoded_path;
1851
#endif
1852
}
1853
1854
35678
URL URL::FromFilePath(const std::string& file_path) {
1855
71356
  URL url("file://");
1856
71356
  std::string escaped_file_path;
1857
3786733
  for (size_t i = 0; i < file_path.length(); ++i) {
1858
3751055
    escaped_file_path += file_path[i];
1859
3751055
    if (file_path[i] == '%')
1860
12
      escaped_file_path += "25";
1861
  }
1862
35678
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1863
             &url.context_, true, nullptr, false);
1864
35678
  return url;
1865
}
1866
1867
// This function works by calling out to a JS function that creates and
1868
// returns the JS URL object. Be mindful of the JS<->Native boundary
1869
// crossing that is required.
1870
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1871
  Isolate* isolate = env->isolate();
1872
  Local<Context> context = env->context();
1873
  Context::Scope context_scope(context);
1874
1875
  const Local<Value> undef = Undefined(isolate);
1876
  const Local<Value> null = Null(isolate);
1877
1878
  if (context_.flags & URL_FLAGS_FAILED)
1879
    return Local<Value>();
1880
1881
  Local<Value> argv[] = {
1882
    undef,
1883
    undef,
1884
    undef,
1885
    undef,
1886
    null,  // host defaults to null
1887
    null,  // port defaults to null
1888
    undef,
1889
    null,  // query defaults to null
1890
    null,  // fragment defaults to null
1891
  };
1892
  SetArgs(env, argv, context_);
1893
1894
  MaybeLocal<Value> ret;
1895
  {
1896
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1897
1898
    // The SetURLConstructor method must have been called already to
1899
    // set the constructor function used below. SetURLConstructor is
1900
    // called automatically when the internal/url.js module is loaded
1901
    // during the internal/bootstrap/node.js processing.
1902
    ret = env->url_constructor_function()
1903
        ->Call(env->context(), undef, arraysize(argv), argv);
1904
  }
1905
1906
  return ret;
1907
}
1908
1909
}  // namespace url
1910
}  // namespace node
1911
1912
5269
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1913
5201
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)