GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1128 1186 95.1 %
Date: 2022-04-22 04:19:20 Branches: 970 1096 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <numeric>
11
#include <string>
12
#include <vector>
13
14
namespace node {
15
16
using errors::TryCatchScope;
17
18
using url::table_data::hex;
19
using url::table_data::C0_CONTROL_ENCODE_SET;
20
using url::table_data::FRAGMENT_ENCODE_SET;
21
using url::table_data::PATH_ENCODE_SET;
22
using url::table_data::USERINFO_ENCODE_SET;
23
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
24
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
25
26
using v8::Array;
27
using v8::Context;
28
using v8::Function;
29
using v8::FunctionCallbackInfo;
30
using v8::HandleScope;
31
using v8::Int32;
32
using v8::Integer;
33
using v8::Isolate;
34
using v8::Local;
35
using v8::MaybeLocal;
36
using v8::NewStringType;
37
using v8::Null;
38
using v8::Object;
39
using v8::String;
40
using v8::Undefined;
41
using v8::Value;
42
43
142455
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
44
142455
  return String::NewFromUtf8(isolate,
45
                             str.data(),
46
                             NewStringType::kNormal,
47
142455
                             str.length()).ToLocalChecked();
48
}
49
50
namespace url {
51
namespace {
52
53
// https://url.spec.whatwg.org/#eof-code-point
54
constexpr char kEOL = -1;
55
56
// https://url.spec.whatwg.org/#concept-host
57
class URLHost {
58
 public:
59
  ~URLHost();
60
61
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62
  void ParseIPv6Host(const char* input, size_t length);
63
  void ParseOpaqueHost(const char* input, size_t length);
64
  void ParseHost(const char* input,
65
                 size_t length,
66
                 bool is_special,
67
                 bool unicode = false);
68
69
5279
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
70
  std::string ToString() const;
71
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
72
  std::string ToStringMove();
73
74
 private:
75
  enum class HostType {
76
    H_FAILED,
77
    H_DOMAIN,
78
    H_IPV4,
79
    H_IPV6,
80
    H_OPAQUE,
81
  };
82
83
  union Value {
84
    std::string domain_or_opaque;
85
    uint32_t ipv4;
86
    uint16_t ipv6[8];
87
88
5279
    ~Value() {}
89
5279
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
14241
  void Reset() {
96
    using string = std::string;
97
14241
    switch (type_) {
98
4196
      case HostType::H_DOMAIN:
99
      case HostType::H_OPAQUE:
100
4196
        value_.domain_or_opaque.~string();
101
4196
        break;
102
10045
      default:
103
10045
        break;
104
    }
105
14241
    type_ = HostType::H_FAILED;
106
14241
  }
107
108
  // Setting the string members of the union with = is brittle because
109
  // it relies on them being initialized to a state that requires no
110
  // destruction of old data.
111
  // For a long time, that worked well enough because ParseIPv6Host() happens
112
  // to zero-fill `value_`, but that really is relying on standard library
113
  // internals too much.
114
  // These helpers are the easiest solution but we might want to consider
115
  // just not forcing strings into an union.
116
458
  void SetOpaque(std::string&& string) {
117
458
    Reset();
118
458
    type_ = HostType::H_OPAQUE;
119
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
458
  }
121
122
3738
  void SetDomain(std::string&& string) {
123
3738
    Reset();
124
3738
    type_ = HostType::H_DOMAIN;
125
3738
    new(&value_.domain_or_opaque) std::string(std::move(string));
126
3738
  }
127
};
128
129
5279
URLHost::~URLHost() {
130
5279
  Reset();
131
5279
}
132
133
#define ARGS(XX)                                                              \
134
  XX(ARG_FLAGS)                                                               \
135
  XX(ARG_PROTOCOL)                                                            \
136
  XX(ARG_USERNAME)                                                            \
137
  XX(ARG_PASSWORD)                                                            \
138
  XX(ARG_HOST)                                                                \
139
  XX(ARG_PORT)                                                                \
140
  XX(ARG_PATH)                                                                \
141
  XX(ARG_QUERY)                                                               \
142
  XX(ARG_FRAGMENT)                                                            \
143
  XX(ARG_COUNT)  // This one has to be last.
144
145
enum url_cb_args {
146
#define XX(name) name,
147
  ARGS(XX)
148
#undef XX
149
};
150
151
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
152
  template <typename T>                                                       \
153
  bool name(const T ch1, const T ch2) {                                \
154
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
155
                  "Character must be wider than " #bits " bits");             \
156
    return (expr);                                                            \
157
  }                                                                           \
158
  template <typename T>                                                       \
159
  bool name(const std::basic_string<T>& str) {                         \
160
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
161
                  "Character must be wider than " #bits " bits");             \
162
    return str.length() >= 2 && name(str[0], str[1]);                         \
163
  }
164
165
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
166

13797324
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
167
168
// https://infra.spec.whatwg.org/#c0-control-or-space
169

291402
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
170
171
// https://infra.spec.whatwg.org/#ascii-digit
172

565625
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
173
174
// https://infra.spec.whatwg.org/#ascii-hex-digit
175


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
176
                               (ch >= 'A' && ch <= 'F') ||
177
                               (ch >= 'a' && ch <= 'f')))
178
179
// https://infra.spec.whatwg.org/#ascii-alpha
180


1251847
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
181
                            (ch >= 'a' && ch <= 'z')))
182
183
// https://infra.spec.whatwg.org/#ascii-alphanumeric
184

549619
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
185
186
// https://infra.spec.whatwg.org/#ascii-lowercase
187
template <typename T>
188
549691
T ASCIILowercase(T ch) {
189
549691
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
190
}
191
192
// https://url.spec.whatwg.org/#forbidden-host-code-point
193









90935
CHAR_TEST(8, IsForbiddenHostCodePoint,
194
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
195
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
196
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
197
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
198
          ch == '^' || ch == '|')
199
200
// https://url.spec.whatwg.org/#windows-drive-letter
201

12616
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
202
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
203
204
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
205

2712
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
206
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
207
208
#undef TWO_CHAR_STRING_TEST
209
210
11616256
bool BitAt(const uint8_t a[], const uint8_t i) {
211
11616256
  return !!(a[i >> 3] & (1 << (i & 7)));
212
}
213
214
// Appends ch to str. If ch position in encode_set is set, the ch will
215
// be percent-encoded then appended.
216
11616256
void AppendOrEscape(std::string* str,
217
                    const unsigned char ch,
218
                    const uint8_t encode_set[]) {
219
11616256
  if (BitAt(encode_set, ch))
220
1931
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
221
  else
222
11614325
    *str += ch;
223
11616256
}
224
225
850
unsigned hex2bin(const char ch) {
226

850
  if (ch >= '0' && ch <= '9')
227
546
    return ch - '0';
228

304
  if (ch >= 'A' && ch <= 'F')
229
172
    return 10 + (ch - 'A');
230

132
  if (ch >= 'a' && ch <= 'f')
231
132
    return 10 + (ch - 'a');
232
  UNREACHABLE();
233
}
234
235
4407
std::string PercentDecode(const char* input, size_t len) {
236
4407
  std::string dest;
237
4407
  if (len == 0)
238
2
    return dest;
239
4405
  dest.reserve(len);
240
4405
  const char* pointer = input;
241
4405
  const char* end = input + len;
242
243
93890
  while (pointer < end) {
244
89485
    const char ch = pointer[0];
245
89485
    size_t remaining = end - pointer - 1;
246


89922
    if (ch != '%' || remaining < 2 ||
247
437
        (ch == '%' &&
248
437
         (!IsASCIIHexDigit(pointer[1]) ||
249
433
          !IsASCIIHexDigit(pointer[2])))) {
250
89060
      dest += ch;
251
89060
      pointer++;
252
89060
      continue;
253
    } else {
254
425
      unsigned a = hex2bin(pointer[1]);
255
425
      unsigned b = hex2bin(pointer[2]);
256
425
      char c = static_cast<char>(a * 16 + b);
257
425
      dest += c;
258
425
      pointer += 3;
259
    }
260
  }
261
4405
  return dest;
262
}
263
264
#define SPECIALS(XX)                                                          \
265
  XX(ftp, 21, "ftp:")                                                         \
266
  XX(file, -1, "file:")                                                       \
267
  XX(http, 80, "http:")                                                       \
268
  XX(https, 443, "https:")                                                    \
269
  XX(ws, 80, "ws:")                                                           \
270
  XX(wss, 443, "wss:")
271
272
331049
bool IsSpecial(const std::string& scheme) {
273
#define V(_, __, name) if (scheme == name) return true;
274



331049
  SPECIALS(V);
275
#undef V
276
6537
  return false;
277
}
278
279
138772
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
280
#define V(key, _, name) if (scheme == name)                                  \
281
    return env->url_special_##key##_string();
282



138772
  SPECIALS(V)
283
#undef V
284
  UNREACHABLE();
285
}
286
287
132217
int NormalizePort(const std::string& scheme, int p) {
288
#define V(_, port, name) if (scheme == name && p == port) return -1;
289









132217
  SPECIALS(V);
290
#undef V
291
11309
  return p;
292
}
293
294
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
295
6767
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
296
6767
  size_t length = end - p;
297
6064
  return length >= 2 &&
298

12867
    IsWindowsDriveLetter(p[0], p[1]) &&
299
36
    (length == 2 ||
300
36
      p[2] == '/' ||
301
14
      p[2] == '\\' ||
302
6
      p[2] == '?' ||
303
6771
      p[2] == '#');
304
}
305
306
#if defined(NODE_HAVE_I18N_SUPPORT)
307
195
bool ToUnicode(const std::string& input, std::string* output) {
308
390
  MaybeStackBuffer<char> buf;
309
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
310
    return false;
311
195
  output->assign(*buf, buf.length());
312
195
  return true;
313
}
314
315
4394
bool ToASCII(const std::string& input, std::string* output) {
316
8788
  MaybeStackBuffer<char> buf;
317
4394
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
318
124
    return false;
319
4270
  if (buf.length() == 0)
320
24
    return false;
321
4246
  output->assign(*buf, buf.length());
322
4246
  return true;
323
}
324
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
325
// Intentional non-ops if ICU is not present.
326
bool ToUnicode(const std::string& input, std::string* output) {
327
  *output = input;
328
  return true;
329
}
330
331
bool ToASCII(const std::string& input, std::string* output) {
332
  *output = input;
333
  return true;
334
}
335
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
336
337
#define NS_IN6ADDRSZ 16
338
339
357
void URLHost::ParseIPv6Host(const char* input, size_t length) {
340
357
  CHECK_EQ(type_, HostType::H_FAILED);
341
342
  unsigned char buf[sizeof(struct in6_addr)];
343
357
  MaybeStackBuffer<char> ipv6(length + 1);
344
357
  *(*ipv6 + length) = 0;
345
357
  memset(buf, 0, sizeof(buf));
346
357
  memcpy(*ipv6, input, sizeof(const char) * length);
347
348
357
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
349
350
357
  if (ret != 0) {
351
92
    return;
352
  }
353
354
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
355
2385
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
356
2120
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
357
  }
358
359
265
  type_ = HostType::H_IPV6;
360
}
361
362
4944
int64_t ParseNumber(const char* start, const char* end) {
363
4944
  unsigned R = 10;
364

4944
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
365
48
    start += 2;
366
48
    R = 16;
367
  }
368
4944
  if (end - start == 0) {
369
8
    return 0;
370

4936
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
371
55
    start++;
372
55
    R = 8;
373
  }
374
4936
  const char* p = start;
375
376
7451
  while (p < end) {
377
6237
    const char ch = p[0];
378

6237
    switch (R) {
379
274
      case 8:
380

274
        if (ch < '0' || ch > '7')
381
29
          return -1;
382
245
        break;
383
5755
      case 10:
384
5755
        if (!IsASCIIDigit(ch))
385
3689
          return -1;
386
2066
        break;
387
208
      case 16:
388
208
        if (!IsASCIIHexDigit(ch))
389
4
          return -1;
390
204
        break;
391
    }
392
2515
    p++;
393
  }
394
1214
  return strtoll(start, nullptr, R);
395
}
396
397
4071
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
398
4071
  CHECK_EQ(type_, HostType::H_FAILED);
399
4071
  *is_ipv4 = false;
400
4071
  const char* pointer = input;
401
4071
  const char* mark = input;
402
4071
  const char* end = pointer + length;
403
4071
  int parts = 0;
404
4071
  uint32_t val = 0;
405
  uint64_t numbers[4];
406
4071
  int tooBigNumbers = 0;
407
4071
  if (length == 0)
408
3766
    return;
409
410
37984
  while (pointer <= end) {
411
37655
    const char ch = pointer < end ? pointer[0] : kEOL;
412
37655
    int64_t remaining = end - pointer - 1;
413

37655
    if (ch == '.' || ch == kEOL) {
414
4960
      if (++parts > static_cast<int>(arraysize(numbers)))
415
4
        return;
416
4956
      if (pointer == mark)
417
12
        return;
418
4944
      int64_t n = ParseNumber(mark, pointer);
419
4944
      if (n < 0)
420
3722
        return;
421
422
1222
      if (n > 255) {
423
112
        tooBigNumbers++;
424
      }
425
1222
      numbers[parts - 1] = n;
426
1222
      mark = pointer + 1;
427

1222
      if (ch == '.' && remaining == 0)
428
4
        break;
429
    }
430
33913
    pointer++;
431
  }
432
333
  CHECK_GT(parts, 0);
433
333
  *is_ipv4 = true;
434
435
  // If any but the last item in numbers is greater than 255, return failure.
436
  // If the last item in numbers is greater than or equal to
437
  // 256^(5 - the number of items in numbers), return failure.
438
329
  if (tooBigNumbers > 1 ||
439

722
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
440
325
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
441
28
    return;
442
  }
443
444
305
  type_ = HostType::H_IPV4;
445
305
  val = static_cast<uint32_t>(numbers[parts - 1]);
446
1096
  for (int n = 0; n < parts - 1; n++) {
447
791
    double b = 3 - n;
448
791
    val +=
449
791
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
450
  }
451
452
305
  value_.ipv4 = val;
453
}
454
455
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
456
520
  CHECK_EQ(type_, HostType::H_FAILED);
457
520
  std::string output;
458
520
  output.reserve(length);
459
3053
  for (size_t i = 0; i < length; i++) {
460
2595
    const char ch = input[i];
461

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
462
62
      return;
463
    } else {
464
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
465
    }
466
  }
467
468
458
  SetOpaque(std::move(output));
469
}
470
471
5279
void URLHost::ParseHost(const char* input,
472
                        size_t length,
473
                        bool is_special,
474
                        bool unicode) {
475
5279
  CHECK_EQ(type_, HostType::H_FAILED);
476
5279
  const char* pointer = input;
477
478
5279
  if (length == 0)
479
1541
    return;
480
481
5279
  if (pointer[0] == '[') {
482
365
    if (pointer[length - 1] != ']')
483
8
      return;
484
357
    return ParseIPv6Host(++pointer, length - 2);
485
  }
486
487
4914
  if (!is_special)
488
520
    return ParseOpaqueHost(input, length);
489
490
  // First, we have to percent decode
491
4394
  std::string decoded = PercentDecode(input, length);
492
493
  // Then we have to punycode toASCII
494
4394
  if (!ToASCII(decoded, &decoded))
495
148
    return;
496
497
  // If any of the following characters are still present, we have to fail
498
92437
  for (size_t n = 0; n < decoded.size(); n++) {
499
88366
    const char ch = decoded[n];
500
88366
    if (IsForbiddenHostCodePoint(ch)) {
501
175
      return;
502
    }
503
  }
504
505
  // Check to see if it's an IPv4 IP address
506
  bool is_ipv4;
507
4071
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
508
4071
  if (is_ipv4)
509
333
    return;
510
511
  // If the unicode flag is set, run the result through punycode ToUnicode
512

3738
  if (unicode && !ToUnicode(decoded, &decoded))
513
    return;
514
515
  // It's not an IPv4 or IPv6 address, it must be a domain
516
3738
  SetDomain(std::move(decoded));
517
}
518
519
// Locates the longest sequence of 0 segments in an IPv6 address
520
// in order to use the :: compression when serializing
521
template <typename T>
522
265
T* FindLongestZeroSequence(T* values, size_t len) {
523
265
  T* start = values;
524
265
  T* end = start + len;
525
265
  T* result = nullptr;
526
527
265
  T* current = nullptr;
528
265
  unsigned counter = 0, longest = 1;
529
530
2385
  while (start < end) {
531
2120
    if (*start == 0) {
532
1805
      if (current == nullptr)
533
279
        current = start;
534
1805
      counter++;
535
    } else {
536
315
      if (counter > longest) {
537
257
        longest = counter;
538
257
        result = current;
539
      }
540
315
      counter = 0;
541
315
      current = nullptr;
542
    }
543
2120
    start++;
544
  }
545
265
  if (counter > longest)
546
6
    result = current;
547
265
  return result;
548
}
549
550
4766
std::string URLHost::ToStringMove() {
551
4766
  std::string return_value;
552
4766
  switch (type_) {
553
4196
    case HostType::H_DOMAIN:
554
    case HostType::H_OPAQUE:
555
4196
      return_value = std::move(value_.domain_or_opaque);
556
4196
      break;
557
570
    default:
558
570
      return_value = ToString();
559
570
      break;
560
  }
561
4766
  Reset();
562
4766
  return return_value;
563
}
564
565
570
std::string URLHost::ToString() const {
566
1140
  std::string dest;
567

570
  switch (type_) {
568
    case HostType::H_DOMAIN:
569
    case HostType::H_OPAQUE:
570
      return value_.domain_or_opaque;
571
305
    case HostType::H_IPV4: {
572
305
      dest.reserve(15);
573
305
      uint32_t value = value_.ipv4;
574
1525
      for (int n = 0; n < 4; n++) {
575
1220
        dest.insert(0, std::to_string(value % 256));
576
1220
        if (n < 3)
577
915
          dest.insert(0, 1, '.');
578
1220
        value /= 256;
579
      }
580
305
      break;
581
    }
582
265
    case HostType::H_IPV6: {
583
265
      dest.reserve(41);
584
265
      dest += '[';
585
265
      const uint16_t* start = &value_.ipv6[0];
586
      const uint16_t* compress_pointer =
587
265
          FindLongestZeroSequence(start, 8);
588
265
      bool ignore0 = false;
589
2385
      for (int n = 0; n <= 7; n++) {
590
2120
        const uint16_t* piece = &value_.ipv6[n];
591

2120
        if (ignore0 && *piece == 0)
592
1785
          continue;
593
596
        else if (ignore0)
594
255
          ignore0 = false;
595
596
        if (compress_pointer == piece) {
596
261
          dest += n == 0 ? "::" : ":";
597
261
          ignore0 = true;
598
261
          continue;
599
        }
600
        char buf[5];
601
335
        snprintf(buf, sizeof(buf), "%x", *piece);
602
335
        dest += buf;
603
335
        if (n < 7)
604
76
          dest += ':';
605
      }
606
265
      dest += ']';
607
265
      break;
608
    }
609
    case HostType::H_FAILED:
610
      break;
611
  }
612
570
  return dest;
613
}
614
615
4937
bool ParseHost(const std::string& input,
616
               std::string* output,
617
               bool is_special,
618
               bool unicode = false) {
619
4937
  if (input.empty()) {
620
94
    output->clear();
621
94
    return true;
622
  }
623
9686
  URLHost host;
624
4843
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
625
4843
  if (host.ParsingFailed())
626
489
    return false;
627
4354
  *output = host.ToStringMove();
628
4354
  return true;
629
}
630
631
8909
std::vector<std::string> FromJSStringArray(Environment* env,
632
                                           Local<Array> array) {
633
8909
  std::vector<std::string> vec;
634
8909
  if (array->Length() > 0)
635
8893
    vec.reserve(array->Length());
636
135162
  for (size_t n = 0; n < array->Length(); n++) {
637
117344
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
638
117344
    if (val->IsString()) {
639
58672
      Utf8Value value(env->isolate(), val.As<String>());
640
58672
      vec.emplace_back(*value, value.length());
641
    }
642
  }
643
8909
  return vec;
644
}
645
646
8909
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
647
8909
  url_data base;
648
8909
  Local<Context> context = env->context();
649
650
  Local<Value> flags =
651
26727
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
652
8909
  if (flags->IsInt32())
653
17818
    base.flags = flags->Int32Value(context).FromJust();
654
655
  Local<Value> port =
656
26727
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
657
8909
  if (port->IsInt32())
658
76
    base.port = port->Int32Value(context).FromJust();
659
660
  Local<Value> scheme =
661
17818
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
662
8909
  base.scheme = Utf8Value(env->isolate(), scheme).out();
663
664
  auto GetStr = [&](std::string url_data::*member,
665
                    int flag,
666
                    Local<String> name,
667
44545
                    bool empty_as_present) {
668
89090
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
669
89090
    if (value->IsString()) {
670
51214
      Utf8Value utf8value(env->isolate(), value.As<String>());
671
25607
      (base.*member).assign(*utf8value, utf8value.length());
672

43425
      if (empty_as_present || value.As<String>()->Length() != 0) {
673
7811
        base.flags |= flag;
674
      }
675
    }
676
53454
  };
677
8909
  GetStr(&url_data::username,
678
         URL_FLAGS_HAS_USERNAME,
679
         env->username_string(),
680
         false);
681
8909
  GetStr(&url_data::password,
682
         URL_FLAGS_HAS_PASSWORD,
683
         env->password_string(),
684
         false);
685
8909
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
686
8909
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
687
8909
  GetStr(&url_data::fragment,
688
         URL_FLAGS_HAS_FRAGMENT,
689
         env->fragment_string(),
690
         true);
691
692
  Local<Value>
693
26727
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
694
8909
  if (path->IsArray()) {
695
8909
    base.flags |= URL_FLAGS_HAS_PATH;
696
8909
    base.path = FromJSStringArray(env, path.As<Array>());
697
  }
698
8909
  return base;
699
}
700
701
41703
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
702
41703
  url_data context;
703
  Local<Value> flags =
704
125109
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
705
41703
  if (flags->IsInt32()) {
706
    static constexpr int32_t kCopyFlagsMask =
707
        URL_FLAGS_SPECIAL |
708
        URL_FLAGS_CANNOT_BE_BASE |
709
        URL_FLAGS_HAS_USERNAME |
710
        URL_FLAGS_HAS_PASSWORD |
711
        URL_FLAGS_HAS_HOST;
712
41703
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
713
  }
714
  Local<Value> scheme =
715
125109
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
716
83406
  if (scheme->IsString()) {
717
83406
    Utf8Value value(env->isolate(), scheme);
718
41703
    context.scheme.assign(*value, value.length());
719
  }
720
  Local<Value> port =
721
125109
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
722
41703
  if (port->IsInt32())
723
243
    context.port = port.As<Int32>()->Value();
724
41703
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
725
    Local<Value> username =
726
221
        context_obj->Get(env->context(),
727
663
                         env->username_string()).ToLocalChecked();
728
442
    CHECK(username->IsString());
729
442
    Utf8Value value(env->isolate(), username);
730
221
    context.username.assign(*value, value.length());
731
  }
732
41703
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
733
    Local<Value> password =
734
209
        context_obj->Get(env->context(),
735
627
                         env->password_string()).ToLocalChecked();
736
418
    CHECK(password->IsString());
737
418
    Utf8Value value(env->isolate(), password);
738
209
    context.password.assign(*value, value.length());
739
  }
740
  Local<Value> host =
741
41703
      context_obj->Get(env->context(),
742
125109
                       env->host_string()).ToLocalChecked();
743
83406
  if (host->IsString()) {
744
83330
    Utf8Value value(env->isolate(), host);
745
41665
    context.host.assign(*value, value.length());
746
  }
747
41703
  return context;
748
}
749
750
// Single dot segment can be ".", "%2e", or "%2E"
751
2441351
bool IsSingleDotSegment(const std::string& str) {
752
2441351
  switch (str.size()) {
753
7005
    case 1:
754
7005
      return str == ".";
755
145348
    case 3:
756
145348
      return str[0] == '%' &&
757

145394
             str[1] == '2' &&
758
145394
             ASCIILowercase(str[2]) == 'e';
759
2288998
    default:
760
2288998
      return false;
761
  }
762
}
763
764
// Double dot segment can be:
765
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
766
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
767
1225568
bool IsDoubleDotSegment(const std::string& str) {
768

1225568
  switch (str.size()) {
769
5096
    case 2:
770
5096
      return str == "..";
771
346720
    case 4:
772

346720
      if (str[0] != '.' && str[0] != '%')
773
346701
        return false;
774
19
      return ((str[0] == '.' &&
775
13
               str[1] == '%' &&
776

8
               str[2] == '2' &&
777
42
               ASCIILowercase(str[3]) == 'e') ||
778
15
              (str[0] == '%' &&
779

12
               str[1] == '2' &&
780
6
               ASCIILowercase(str[2]) == 'e' &&
781
25
               str[3] == '.'));
782
69716
    case 6:
783
69716
      return (str[0] == '%' &&
784

24
              str[1] == '2' &&
785
12
              ASCIILowercase(str[2]) == 'e' &&
786
4
              str[3] == '%' &&
787

69732
              str[4] == '2' &&
788
69720
              ASCIILowercase(str[5]) == 'e');
789
804036
    default:
790
804036
      return false;
791
  }
792
}
793
794
11328
void ShortenUrlPath(struct url_data* url) {
795
11328
  if (url->path.empty()) return;
796


11527
  if (url->path.size() == 1 && url->scheme == "file:" &&
797
600
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
798
10927
  url->path.pop_back();
799
}
800
801
}  // anonymous namespace
802
803
221345
void URL::Parse(const char* input,
804
                size_t len,
805
                enum url_parse_state state_override,
806
                struct url_data* url,
807
                bool has_url,
808
                const struct url_data* base,
809
                bool has_base) {
810
221345
  const char* p = input;
811
221345
  const char* end = input + len;
812
813
221345
  if (!has_url) {
814
145724
    for (const char* ptr = p; ptr < end; ptr++) {
815
145705
      if (IsC0ControlOrSpace(*ptr))
816
56
        p++;
817
      else
818
145649
        break;
819
    }
820
145716
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
821
145697
      if (IsC0ControlOrSpace(*ptr))
822
48
        end--;
823
      else
824
145649
        break;
825
    }
826
145668
    input = p;
827
145668
    len = end - p;
828
  }
829
830
  // The spec says we should strip out any ASCII tabs or newlines.
831
  // In those cases, we create another std::string instance with the filtered
832
  // contents, but in the general case we avoid the overhead.
833
221345
  std::string whitespace_stripped;
834
14017646
  for (const char* ptr = p; ptr < end; ptr++) {
835
13796471
    if (!IsASCIITabOrNewline(*ptr))
836
13796301
      continue;
837
    // Hit tab or newline. Allocate storage, copy what we have until now,
838
    // and then iterate and filter all similar characters out.
839
170
    whitespace_stripped.reserve(len - 1);
840
170
    whitespace_stripped.assign(p, ptr - p);
841
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
842
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
843
853
      if (!IsASCIITabOrNewline(*ptr))
844
769
        whitespace_stripped += *ptr;
845
    }
846
847
    // Update variables like they should have looked like if the string
848
    // had been stripped of whitespace to begin with.
849
170
    input = whitespace_stripped.c_str();
850
170
    len = whitespace_stripped.size();
851
170
    p = input;
852
170
    end = input + len;
853
170
    break;
854
  }
855
856
221345
  bool atflag = false;  // Set when @ has been seen.
857
221345
  bool square_bracket_flag = false;  // Set inside of [...]
858
221345
  bool password_token_seen_flag = false;  // Set after a : after an username.
859
860
221345
  std::string buffer;
861
862
  // Set the initial parse state.
863
221345
  const bool has_state_override = state_override != kUnknownState;
864
221345
  enum url_parse_state state = has_state_override ? state_override :
865
                                                    kSchemeStart;
866
867

221345
  if (state < kSchemeStart || state > kFragment) {
868
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
869
    return;
870
  }
871
872
14531412
  while (p <= end) {
873
14318887
    const char ch = p < end ? p[0] : kEOL;
874
14318887
    bool special = (url->flags & URL_FLAGS_SPECIAL);
875
    bool cannot_be_base;
876

14318887
    bool special_back_slash = (special && ch == '\\');
877
878





14318887
    switch (state) {
879
145751
      case kSchemeStart:
880
145751
        if (IsASCIIAlpha(ch)) {
881
132690
          buffer += ASCIILowercase(ch);
882
132690
          state = kScheme;
883
13061
        } else if (!has_state_override) {
884
13051
          state = kNoScheme;
885
13051
          continue;
886
        } else {
887
10
          url->flags |= URL_FLAGS_FAILED;
888
10
          return;
889
        }
890
132690
        break;
891
549619
      case kScheme:
892


549619
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
893
416929
          buffer += ASCIILowercase(ch);
894

132690
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
895

130668
          if (has_state_override && buffer.size() == 0) {
896
            url->flags |= URL_FLAGS_TERMINATED;
897
            return;
898
          }
899
130668
          buffer += ':';
900
901
130668
          bool new_is_special = IsSpecial(buffer);
902
903
130668
          if (has_state_override) {
904
45
            if ((special != new_is_special) ||
905
45
                ((buffer == "file:") &&
906
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
907
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
908


116
                  (url->port != -1))) ||
909
45
                  (url->scheme == "file:" && url->host.empty())) {
910
32
              url->flags |= URL_FLAGS_TERMINATED;
911
32
              return;
912
            }
913
          }
914
915
130636
          url->scheme = std::move(buffer);
916
130636
          url->port = NormalizePort(url->scheme, url->port);
917
130636
          if (new_is_special) {
918
124805
            url->flags |= URL_FLAGS_SPECIAL;
919
124805
            special = true;
920
          } else {
921
5831
            url->flags &= ~URL_FLAGS_SPECIAL;
922
5831
            special = false;
923
          }
924
          // `special_back_slash` equals to `(special && ch == '\\')` and `ch`
925
          // here always not equals to `\\`. So `special_back_slash` here always
926
          // equals to `false`.
927
130636
          special_back_slash = false;
928
130636
          buffer.clear();
929
130636
          if (has_state_override)
930
33
            return;
931
130603
          if (url->scheme == "file:") {
932
120859
            state = kFile;
933
3929
          } else if (special &&
934

13673
                     has_base &&
935
1039
                     url->scheme == base->scheme) {
936
329
            state = kSpecialRelativeOrAuthority;
937
9415
          } else if (special) {
938
3600
            state = kSpecialAuthoritySlashes;
939

5815
          } else if (p + 1 < end && p[1] == '/') {
940
716
            state = kPathOrAuthority;
941
716
            p++;
942
          } else {
943
5099
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
944
5099
            url->flags |= URL_FLAGS_HAS_PATH;
945
5099
            url->path.emplace_back("");
946
5099
            state = kCannotBeBase;
947
130603
          }
948
2022
        } else if (!has_state_override) {
949
2014
          buffer.clear();
950
2014
          state = kNoScheme;
951
2014
          p = input;
952
2014
          continue;
953
        } else {
954
8
          url->flags |= URL_FLAGS_FAILED;
955
8
          return;
956
        }
957
547532
        break;
958
15065
      case kNoScheme:
959

15065
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
960

15065
        if (!has_base || (cannot_be_base && ch != '#')) {
961
7836
          url->flags |= URL_FLAGS_FAILED;
962
7836
          return;
963

7229
        } else if (cannot_be_base && ch == '#') {
964
28
          url->scheme = base->scheme;
965
28
          if (IsSpecial(url->scheme)) {
966
            url->flags |= URL_FLAGS_SPECIAL;
967
            special = true;
968
          } else {
969
28
            url->flags &= ~URL_FLAGS_SPECIAL;
970
28
            special = false;
971
          }
972

28
          special_back_slash = (special && ch == '\\');
973
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
974
28
            url->flags |= URL_FLAGS_HAS_PATH;
975
28
            url->path = base->path;
976
          }
977
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
978
4
            url->flags |= URL_FLAGS_HAS_QUERY;
979
4
            url->query = base->query;
980
          }
981
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
982
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
983
            url->fragment = base->fragment;
984
          }
985
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
986
28
          state = kFragment;
987

14402
        } else if (has_base &&
988
7201
                   base->scheme != "file:") {
989
413
          state = kRelative;
990
413
          continue;
991
        } else {
992
6788
          url->scheme = "file:";
993
6788
          url->flags |= URL_FLAGS_SPECIAL;
994
6788
          special = true;
995
6788
          state = kFile;
996

6788
          special_back_slash = (special && ch == '\\');
997
6788
          continue;
998
        }
999
28
        break;
1000
329
      case kSpecialRelativeOrAuthority:
1001

329
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1002
297
          state = kSpecialAuthorityIgnoreSlashes;
1003
297
          p++;
1004
        } else {
1005
32
          state = kRelative;
1006
32
          continue;
1007
        }
1008
297
        break;
1009
716
      case kPathOrAuthority:
1010
716
        if (ch == '/') {
1011
548
          state = kAuthority;
1012
        } else {
1013
168
          state = kPath;
1014
168
          continue;
1015
        }
1016
548
        break;
1017
445
      case kRelative:
1018
445
        url->scheme = base->scheme;
1019
445
        if (IsSpecial(url->scheme)) {
1020
345
          url->flags |= URL_FLAGS_SPECIAL;
1021
345
          special = true;
1022
        } else {
1023
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1024
100
          special = false;
1025
        }
1026

445
        special_back_slash = (special && ch == '\\');
1027

445
        switch (ch) {
1028
18
          case kEOL:
1029
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1030
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1031
4
              url->username = base->username;
1032
            }
1033
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1034
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1035
4
              url->password = base->password;
1036
            }
1037
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1038
16
              url->flags |= URL_FLAGS_HAS_HOST;
1039
16
              url->host = base->host;
1040
            }
1041
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1042
              url->flags |= URL_FLAGS_HAS_QUERY;
1043
              url->query = base->query;
1044
            }
1045
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1046
18
              url->flags |= URL_FLAGS_HAS_PATH;
1047
18
              url->path = base->path;
1048
            }
1049
18
            url->port = base->port;
1050
18
            break;
1051
154
          case '/':
1052
154
            state = kRelativeSlash;
1053
154
            break;
1054
38
          case '?':
1055
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1056
              url->flags |= URL_FLAGS_HAS_USERNAME;
1057
              url->username = base->username;
1058
            }
1059
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1060
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1061
              url->password = base->password;
1062
            }
1063
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1064
34
              url->flags |= URL_FLAGS_HAS_HOST;
1065
34
              url->host = base->host;
1066
            }
1067
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1068
38
              url->flags |= URL_FLAGS_HAS_PATH;
1069
38
              url->path = base->path;
1070
            }
1071
38
            url->port = base->port;
1072
38
            state = kQuery;
1073
38
            break;
1074
38
          case '#':
1075
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1076
              url->flags |= URL_FLAGS_HAS_USERNAME;
1077
              url->username = base->username;
1078
            }
1079
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1080
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1081
              url->password = base->password;
1082
            }
1083
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1084
34
              url->flags |= URL_FLAGS_HAS_HOST;
1085
34
              url->host = base->host;
1086
            }
1087
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1088
              url->flags |= URL_FLAGS_HAS_QUERY;
1089
              url->query = base->query;
1090
            }
1091
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1092
38
              url->flags |= URL_FLAGS_HAS_PATH;
1093
38
              url->path = base->path;
1094
            }
1095
38
            url->port = base->port;
1096
38
            state = kFragment;
1097
38
            break;
1098
197
          default:
1099
197
            if (special_back_slash) {
1100
18
              state = kRelativeSlash;
1101
            } else {
1102
179
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1103
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1104
1
                url->username = base->username;
1105
              }
1106
179
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1107
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1108
1
                url->password = base->password;
1109
              }
1110
179
              if (base->flags & URL_FLAGS_HAS_HOST) {
1111
159
                url->flags |= URL_FLAGS_HAS_HOST;
1112
159
                url->host = base->host;
1113
              }
1114
179
              if (base->flags & URL_FLAGS_HAS_PATH) {
1115
179
                url->flags |= URL_FLAGS_HAS_PATH;
1116
179
                url->path = base->path;
1117
179
                ShortenUrlPath(url);
1118
              }
1119
179
              url->port = base->port;
1120
179
              state = kPath;
1121
179
              continue;
1122
            }
1123
        }
1124
266
        break;
1125
172
      case kRelativeSlash:
1126


172
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1127
22
          state = kSpecialAuthorityIgnoreSlashes;
1128
150
        } else if (ch == '/') {
1129
6
          state = kAuthority;
1130
        } else {
1131
144
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1132
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1133
8
            url->username = base->username;
1134
          }
1135
144
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1136
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1137
4
            url->password = base->password;
1138
          }
1139
144
          if (base->flags & URL_FLAGS_HAS_HOST) {
1140
136
            url->flags |= URL_FLAGS_HAS_HOST;
1141
136
            url->host = base->host;
1142
          }
1143
144
          url->port = base->port;
1144
144
          state = kPath;
1145
144
          continue;
1146
        }
1147
28
        break;
1148
3600
      case kSpecialAuthoritySlashes:
1149
3600
        state = kSpecialAuthorityIgnoreSlashes;
1150

3600
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1151
3451
          p++;
1152
        } else {
1153
149
          continue;
1154
        }
1155
3451
        break;
1156
3996
      case kSpecialAuthorityIgnoreSlashes:
1157

3996
        if (ch != '/' && ch != '\\') {
1158
3919
          state = kAuthority;
1159
3919
          continue;
1160
        }
1161
77
        break;
1162
94311
      case kAuthority:
1163
94311
        if (ch == '@') {
1164
565
          if (atflag) {
1165
41
            buffer.reserve(buffer.size() + 3);
1166
41
            buffer.insert(0, "%40");
1167
          }
1168
565
          atflag = true;
1169
565
          size_t blen = buffer.size();
1170

565
          if (blen > 0 && buffer[0] != ':') {
1171
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1172
          }
1173
6652
          for (size_t n = 0; n < blen; n++) {
1174
6087
            const char bch = buffer[n];
1175
6087
            if (bch == ':') {
1176
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1177
444
              if (!password_token_seen_flag) {
1178
428
                password_token_seen_flag = true;
1179
428
                continue;
1180
              }
1181
            }
1182
5659
            if (password_token_seen_flag) {
1183
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1184
            } else {
1185
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1186
            }
1187
          }
1188
565
          buffer.clear();
1189

93746
        } else if (ch == kEOL ||
1190
89343
                   ch == '/' ||
1191
89311
                   ch == '?' ||
1192
89293
                   ch == '#' ||
1193
                   special_back_slash) {
1194

4473
          if (atflag && buffer.size() == 0) {
1195
52
            url->flags |= URL_FLAGS_FAILED;
1196
52
            return;
1197
          }
1198
4421
          p -= buffer.size() + 1;
1199
4421
          buffer.clear();
1200
4421
          state = kHost;
1201
        } else {
1202
89273
          buffer += ch;
1203
        }
1204
94259
        break;
1205
85572
      case kHost:
1206
      case kHostname:
1207

85572
        if (has_state_override && url->scheme == "file:") {
1208
12
          state = kFileHost;
1209
12
          continue;
1210

85560
        } else if (ch == ':' && !square_bracket_flag) {
1211
1621
          if (buffer.size() == 0) {
1212
24
            url->flags |= URL_FLAGS_FAILED;
1213
24
            return;
1214
          }
1215
1597
          if (state_override == kHostname) {
1216
4
            return;
1217
          }
1218
1593
          url->flags |= URL_FLAGS_HAS_HOST;
1219
1593
          if (!ParseHost(buffer, &url->host, special)) {
1220
5
            url->flags |= URL_FLAGS_FAILED;
1221
5
            return;
1222
          }
1223
1588
          buffer.clear();
1224
1588
          state = kPort;
1225

83939
        } else if (ch == kEOL ||
1226
80849
                   ch == '/' ||
1227
80809
                   ch == '?' ||
1228
80783
                   ch == '#' ||
1229
                   special_back_slash) {
1230
3180
          p--;
1231

3180
          if (special && buffer.size() == 0) {
1232
21
            url->flags |= URL_FLAGS_FAILED;
1233
21
            return;
1234
          }
1235
331
          if (has_state_override &&
1236

3528
              buffer.size() == 0 &&
1237
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1238
38
               url->port != -1)) {
1239
8
            url->flags |= URL_FLAGS_TERMINATED;
1240
8
            return;
1241
          }
1242
3151
          url->flags |= URL_FLAGS_HAS_HOST;
1243
3151
          if (!ParseHost(buffer, &url->host, special)) {
1244
432
            url->flags |= URL_FLAGS_FAILED;
1245
432
            return;
1246
          }
1247
2719
          buffer.clear();
1248
2719
          state = kPathStart;
1249
2719
          if (has_state_override) {
1250
227
            return;
1251
          }
1252
        } else {
1253
80759
          if (ch == '[')
1254
359
            square_bracket_flag = true;
1255
80759
          if (ch == ']')
1256
355
            square_bracket_flag = false;
1257
80759
          buffer += ch;
1258
        }
1259
84839
        break;
1260
9173
      case kPort:
1261
9173
        if (IsASCIIDigit(ch)) {
1262
7516
          buffer += ch;
1263

1657
        } else if (has_state_override ||
1264
1139
                   ch == kEOL ||
1265
36
                   ch == '/' ||
1266
36
                   ch == '?' ||
1267
36
                   ch == '#' ||
1268
                   special_back_slash) {
1269
1621
          if (buffer.size() > 0) {
1270
1607
            unsigned port = 0;
1271
            // the condition port <= 0xffff prevents integer overflow
1272

8907
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1273
7300
              port = port * 10 + buffer[i] - '0';
1274
1607
            if (port > 0xffff) {
1275
              // TODO(TimothyGu): This hack is currently needed for the host
1276
              // setter since it needs access to hostname if it is valid, and
1277
              // if the FAILED flag is set the entire response to JS layer
1278
              // will be empty.
1279
26
              if (state_override == kHost)
1280
2
                url->port = -1;
1281
              else
1282
24
                url->flags |= URL_FLAGS_FAILED;
1283
26
              return;
1284
            }
1285
            // the port is valid
1286
1581
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1287
1581
            if (url->port == -1)
1288
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1289
1581
            buffer.clear();
1290
14
          } else if (has_state_override) {
1291
            // TODO(TimothyGu): Similar case as above.
1292
6
            if (state_override == kHost)
1293
2
              url->port = -1;
1294
            else
1295
4
              url->flags |= URL_FLAGS_TERMINATED;
1296
6
            return;
1297
          }
1298
1589
          state = kPathStart;
1299
1589
          continue;
1300
        } else {
1301
36
          url->flags |= URL_FLAGS_FAILED;
1302
36
          return;
1303
        }
1304
7516
        break;
1305
127647
      case kFile:
1306
127647
        url->scheme = "file:";
1307
127647
        url->host.clear();
1308
127647
        url->flags |= URL_FLAGS_HAS_HOST;
1309

127647
        if (ch == '/' || ch == '\\') {
1310
120977
          state = kFileSlash;
1311

6670
        } else if (has_base && base->scheme == "file:") {
1312

6651
          switch (ch) {
1313
4
            case kEOL:
1314
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1315
4
                url->host = base->host;
1316
              }
1317
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1318
4
                url->flags |= URL_FLAGS_HAS_PATH;
1319
4
                url->path = base->path;
1320
              }
1321
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1322
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1323
4
                url->query = base->query;
1324
              }
1325
4
              break;
1326
4
            case '?':
1327
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1328
4
                url->host = base->host;
1329
              }
1330
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1331
4
                url->flags |= URL_FLAGS_HAS_PATH;
1332
4
                url->path = base->path;
1333
              }
1334
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1335
4
              url->query.clear();
1336
4
              state = kQuery;
1337
4
              break;
1338
4
            case '#':
1339
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1340
4
                url->host = base->host;
1341
              }
1342
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1343
4
                url->flags |= URL_FLAGS_HAS_PATH;
1344
4
                url->path = base->path;
1345
              }
1346
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1347
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1348
4
                url->query = base->query;
1349
              }
1350
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1351
4
              url->fragment.clear();
1352
4
              state = kFragment;
1353
4
              break;
1354
6639
            default:
1355
6639
              url->query.clear();
1356
6639
              if (base->flags & URL_FLAGS_HAS_HOST) {
1357
6639
                url->host = base->host;
1358
              }
1359
6639
              if (base->flags & URL_FLAGS_HAS_PATH) {
1360
6639
                url->flags |= URL_FLAGS_HAS_PATH;
1361
6639
                url->path = base->path;
1362
              }
1363
6639
              if (!StartsWithWindowsDriveLetter(p, end)) {
1364
6615
                ShortenUrlPath(url);
1365
              } else {
1366
24
                url->path.clear();
1367
              }
1368
6639
              state = kPath;
1369
6639
              continue;
1370
          }
1371
        } else {
1372
19
          state = kPath;
1373
19
          continue;
1374
        }
1375
120989
        break;
1376
120977
      case kFileSlash:
1377

120977
        if (ch == '/' || ch == '\\') {
1378
120815
          state = kFileHost;
1379
        } else {
1380

162
          if (has_base && base->scheme == "file:") {
1381
128
            url->flags |= URL_FLAGS_HAS_HOST;
1382
128
            url->host = base->host;
1383

242
            if (!StartsWithWindowsDriveLetter(p, end) &&
1384
114
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1385
4
              url->flags |= URL_FLAGS_HAS_PATH;
1386
4
              url->path.push_back(base->path[0]);
1387
            }
1388
          }
1389
162
          state = kPath;
1390
162
          continue;
1391
        }
1392
120815
        break;
1393
121932
      case kFileHost:
1394

121932
        if (ch == kEOL ||
1395
1115
            ch == '/' ||
1396
1105
            ch == '\\' ||
1397
1105
            ch == '?' ||
1398
            ch == '#') {
1399
120815
          if (!has_state_override &&
1400

241642
              buffer.size() == 2 &&
1401
22
              IsWindowsDriveLetter(buffer)) {
1402
12
            state = kPath;
1403
120815
          } else if (buffer.size() == 0) {
1404
120622
            url->flags |= URL_FLAGS_HAS_HOST;
1405
120622
            url->host.clear();
1406
120622
            if (has_state_override)
1407
4
              return;
1408
120618
            state = kPathStart;
1409
          } else {
1410
193
            std::string host;
1411
193
            if (!ParseHost(buffer, &host, special)) {
1412
52
              url->flags |= URL_FLAGS_FAILED;
1413
52
              return;
1414
            }
1415
141
            if (host == "localhost")
1416
37
              host.clear();
1417
141
            url->flags |= URL_FLAGS_HAS_HOST;
1418
141
            url->host = host;
1419
141
            if (has_state_override)
1420
4
              return;
1421
137
            buffer.clear();
1422
137
            state = kPathStart;
1423
          }
1424
120767
          continue;
1425
        } else {
1426
1105
          buffer += ch;
1427
        }
1428
1105
        break;
1429
199736
      case kPathStart:
1430
199736
        if (IsSpecial(url->scheme)) {
1431
199188
          state = kPath;
1432

199188
          if (ch != '/' && ch != '\\') {
1433
75633
            continue;
1434
          }
1435

548
        } else if (!has_state_override && ch == '?') {
1436
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1437
6
          url->query.clear();
1438
6
          state = kQuery;
1439

542
        } else if (!has_state_override && ch == '#') {
1440
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1441
6
          url->fragment.clear();
1442
6
          state = kFragment;
1443
536
        } else if (ch != kEOL) {
1444
459
          state = kPath;
1445
459
          if (ch != '/') {
1446
35
            continue;
1447
          }
1448

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1449
2
          url->flags |= URL_FLAGS_HAS_PATH;
1450
2
          url->path.emplace_back("");
1451
        }
1452
124068
        break;
1453
12724310
      case kPath:
1454

12724310
        if (ch == kEOL ||
1455
11499831
            ch == '/' ||
1456
11499761
            special_back_slash ||
1457

11499761
            (!has_state_override && (ch == '?' || ch == '#'))) {
1458
1225568
          if (IsDoubleDotSegment(buffer)) {
1459
4534
            ShortenUrlPath(url);
1460

4534
            if (ch != '/' && !special_back_slash) {
1461
280
              url->flags |= URL_FLAGS_HAS_PATH;
1462
280
              url->path.emplace_back("");
1463
            }
1464
1224167
          } else if (IsSingleDotSegment(buffer) &&
1465

1224167
                     ch != '/' && !special_back_slash) {
1466
717
            url->flags |= URL_FLAGS_HAS_PATH;
1467
717
            url->path.emplace_back("");
1468
1220317
          } else if (!IsSingleDotSegment(buffer)) {
1469
2429876
            if (url->scheme == "file:" &&
1470
1374249
                url->path.empty() &&
1471

2592150
                buffer.size() == 2 &&
1472
100
                IsWindowsDriveLetter(buffer)) {
1473
98
              buffer[1] = ':';
1474
            }
1475
1217901
            url->flags |= URL_FLAGS_HAS_PATH;
1476
1217901
            url->path.emplace_back(std::move(buffer));
1477
          }
1478
1225568
          buffer.clear();
1479
2451136
          if (ch == '?') {
1480
964
            url->flags |= URL_FLAGS_HAS_QUERY;
1481
964
            url->query.clear();
1482
964
            state = kQuery;
1483
1224604
          } else if (ch == '#') {
1484
55
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1485
55
            url->fragment.clear();
1486
55
            state = kFragment;
1487
          }
1488
        } else {
1489
11498742
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1490
        }
1491
12724310
        break;
1492
49667
      case kCannotBeBase:
1493
49667
        switch (ch) {
1494
4
          case '?':
1495
4
            state = kQuery;
1496
4
            break;
1497
10
          case '#':
1498
10
            state = kFragment;
1499
10
            break;
1500
49653
          default:
1501
49653
            if (url->path.empty())
1502
              url->path.emplace_back("");
1503
49653
            else if (ch != kEOL)
1504
44568
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1505
        }
1506
49667
        break;
1507
61614
      case kQuery:
1508

61614
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1509
1175
          url->flags |= URL_FLAGS_HAS_QUERY;
1510
1175
          url->query = std::move(buffer);
1511
1175
          buffer.clear();
1512
1564
          if (ch == '#')
1513
389
            state = kFragment;
1514
        } else {
1515
60439
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1516
                                                QUERY_ENCODE_SET_NONSPECIAL);
1517
        }
1518
61614
        break;
1519
4255
      case kFragment:
1520
4255
        switch (ch) {
1521
604
          case kEOL:
1522
604
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1523
604
            url->fragment = std::move(buffer);
1524
604
            break;
1525
3651
          default:
1526
3651
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1527
        }
1528
4255
        break;
1529
      default:
1530
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1531
        return;
1532
    }
1533
1534
14078354
    p++;
1535
  }
1536
}  // NOLINT(readability/fn_size)
1537
1538
// https://url.spec.whatwg.org/#url-serializing
1539
33974
std::string URL::SerializeURL(const url_data& url,
1540
                              bool exclude = false) {
1541
33974
  std::string output;
1542
33974
  output.reserve(
1543
    10 +  // We generally insert < 10 separator characters between URL parts
1544
33974
    url.scheme.size() +
1545
33974
    url.username.size() +
1546
33974
    url.password.size() +
1547
33974
    url.host.size() +
1548
33974
    url.query.size() +
1549
33974
    url.fragment.size() +
1550
33974
    url.href.size() +
1551
33974
    std::accumulate(
1552
        url.path.begin(),
1553
        url.path.end(),
1554
        0,
1555
358296
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1556
1557
33974
  output += url.scheme;
1558
33974
  if (url.flags & URL_FLAGS_HAS_HOST) {
1559
33974
    output += "//";
1560
33974
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1561
33974
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1562
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1563
        output += url.username;
1564
      }
1565
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1566
        output += ":" + url.password;
1567
      }
1568
      output += "@";
1569
    }
1570
33974
    output += url.host;
1571
33974
    if (url.port != -1) {
1572
      output += ":" + std::to_string(url.port);
1573
    }
1574
  }
1575
33974
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1576
    output += url.path[0];
1577
  } else {
1578
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1579

33974
          url.path.size() > 1 &&
1580
          url.path[0].empty()) {
1581
      output += "/.";
1582
    }
1583
358296
    for (size_t i = 1; i < url.path.size(); i++) {
1584
324322
      output += "/" + url.path[i];
1585
    }
1586
  }
1587
33974
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1588
    output += "?" + url.query;
1589
  }
1590

33974
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1591
    output += "#" + url.fragment;
1592
  }
1593
33974
  output.shrink_to_fit();
1594
33974
  return output;
1595
}
1596
1597
namespace {
1598
144828
void SetArgs(Environment* env,
1599
             Local<Value> argv[ARG_COUNT],
1600
             const struct url_data& url) {
1601
144828
  Isolate* isolate = env->isolate();
1602
144828
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1603
289656
  argv[ARG_PROTOCOL] =
1604
144828
      url.flags & URL_FLAGS_SPECIAL ?
1605
138772
          GetSpecial(env, url.scheme) :
1606
6056
          OneByteString(isolate, url.scheme.c_str());
1607
144828
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1608
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1609
144828
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1610
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1611
144828
  if (url.flags & URL_FLAGS_HAS_HOST)
1612
278936
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1613
144828
  if (url.flags & URL_FLAGS_HAS_QUERY)
1614
2366
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1615
144828
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1616
1200
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1617
144828
  if (url.port > -1)
1618
3442
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1619
144828
  if (url.flags & URL_FLAGS_HAS_PATH)
1620
288464
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1621
144828
}
1622
1623
153368
void Parse(Environment* env,
1624
           Local<Value> recv,
1625
           const char* input,
1626
           size_t len,
1627
           enum url_parse_state state_override,
1628
           Local<Value> base_obj,
1629
           Local<Value> context_obj,
1630
           Local<Function> cb,
1631
           Local<Value> error_cb) {
1632
153368
  Isolate* isolate = env->isolate();
1633
153368
  Local<Context> context = env->context();
1634
153368
  HandleScope handle_scope(isolate);
1635
153368
  Context::Scope context_scope(context);
1636
1637
153368
  const bool has_context = context_obj->IsObject();
1638
153368
  const bool has_base = base_obj->IsObject();
1639
1640
153368
  url_data base;
1641
153368
  url_data url;
1642
153368
  if (has_context)
1643
41703
    url = HarvestContext(env, context_obj.As<Object>());
1644
153368
  if (has_base)
1645
8909
    base = HarvestBase(env, base_obj.As<Object>());
1646
1647
153368
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1648

153368
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1649
41703
      ((state_override != kUnknownState) &&
1650
41703
       (url.flags & URL_FLAGS_TERMINATED)))
1651
44
    return;
1652
1653
  // Define the return value placeholders
1654
153324
  const Local<Value> undef = Undefined(isolate);
1655
153324
  const Local<Value> null = Null(isolate);
1656
153324
  if (!(url.flags & URL_FLAGS_FAILED)) {
1657
    Local<Value> argv[] = {
1658
      undef,
1659
      undef,
1660
      undef,
1661
      undef,
1662
      null,  // host defaults to null
1663
      null,  // port defaults to null
1664
      undef,
1665
      null,  // query defaults to null
1666
      null,  // fragment defaults to null
1667
144828
    };
1668
144828
    SetArgs(env, argv, url);
1669
144828
    USE(cb->Call(context, recv, arraysize(argv), argv));
1670
8496
  } else if (error_cb->IsFunction()) {
1671
16732
    Local<Value> flags = Integer::NewFromUnsigned(isolate, url.flags);
1672
8366
    USE(error_cb.As<Function>()->Call(context, recv, 1, &flags));
1673
  }
1674
}
1675
1676
153368
void Parse(const FunctionCallbackInfo<Value>& args) {
1677
153368
  Environment* env = Environment::GetCurrent(args);
1678
153368
  CHECK_GE(args.Length(), 5);
1679
306736
  CHECK(args[0]->IsString());  // input
1680


416869
  CHECK(args[2]->IsUndefined() ||  // base context
1681
        args[2]->IsNull() ||
1682
        args[2]->IsObject());
1683


431845
  CHECK(args[3]->IsUndefined() ||  // context
1684
        args[3]->IsNull() ||
1685
        args[3]->IsObject());
1686
153368
  CHECK(args[4]->IsFunction());  // complete callback
1687

418401
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1688
1689
153368
  Utf8Value input(env->isolate(), args[0]);
1690
153368
  enum url_parse_state state_override = kUnknownState;
1691
153368
  if (args[1]->IsNumber()) {
1692
153368
    state_override = static_cast<enum url_parse_state>(
1693
306736
        args[1]->Uint32Value(env->context()).FromJust());
1694
  }
1695
1696
306736
  Parse(env, args.This(),
1697
153368
        *input, input.length(),
1698
        state_override,
1699
        args[2],
1700
        args[3],
1701
306736
        args[4].As<Function>(),
1702
        args[5]);
1703
153368
}
1704
1705
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1706
92
  Environment* env = Environment::GetCurrent(args);
1707
92
  CHECK_GE(args.Length(), 1);
1708
184
  CHECK(args[0]->IsString());
1709
184
  Utf8Value value(env->isolate(), args[0]);
1710
92
  std::string output;
1711
92
  size_t len = value.length();
1712
92
  output.reserve(len);
1713
756
  for (size_t n = 0; n < len; n++) {
1714
664
    const char ch = (*value)[n];
1715
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1716
  }
1717
276
  args.GetReturnValue().Set(
1718
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1719
92
}
1720
1721
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1722
229
  Environment* env = Environment::GetCurrent(args);
1723
229
  CHECK_GE(args.Length(), 1);
1724
458
  CHECK(args[0]->IsString());
1725
229
  Utf8Value value(env->isolate(), args[0]);
1726
1727
229
  URLHost host;
1728
  // Assuming the host is used for a special scheme.
1729
229
  host.ParseHost(*value, value.length(), true);
1730
229
  if (host.ParsingFailed()) {
1731
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1732
12
    return;
1733
  }
1734
217
  std::string out = host.ToStringMove();
1735
651
  args.GetReturnValue().Set(
1736
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1737
}
1738
1739
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1740
207
  Environment* env = Environment::GetCurrent(args);
1741
207
  CHECK_GE(args.Length(), 1);
1742
414
  CHECK(args[0]->IsString());
1743
207
  Utf8Value value(env->isolate(), args[0]);
1744
1745
207
  URLHost host;
1746
  // Assuming the host is used for a special scheme.
1747
207
  host.ParseHost(*value, value.length(), true, true);
1748
207
  if (host.ParsingFailed()) {
1749
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1750
12
    return;
1751
  }
1752
195
  std::string out = host.ToStringMove();
1753
585
  args.GetReturnValue().Set(
1754
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1755
}
1756
1757
632
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1758
632
  Environment* env = Environment::GetCurrent(args);
1759
632
  CHECK_EQ(args.Length(), 1);
1760
632
  CHECK(args[0]->IsFunction());
1761
1264
  env->set_url_constructor_function(args[0].As<Function>());
1762
632
}
1763
1764
632
void Initialize(Local<Object> target,
1765
                Local<Value> unused,
1766
                Local<Context> context,
1767
                void* priv) {
1768
632
  Environment* env = Environment::GetCurrent(context);
1769
632
  env->SetMethod(target, "parse", Parse);
1770
632
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1771
632
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1772
632
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1773
632
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1774
1775
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1776
17064
  FLAGS(XX)
1777
#undef XX
1778
1779
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1780
26544
  PARSESTATES(XX)
1781
#undef XX
1782
632
}
1783
}  // namespace
1784
1785
5013
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1786
5013
  registry->Register(Parse);
1787
5013
  registry->Register(EncodeAuthSet);
1788
5013
  registry->Register(DomainToASCII);
1789
5013
  registry->Register(DomainToUnicode);
1790
5013
  registry->Register(SetURLConstructor);
1791
5013
}
1792
1793
8
std::string URL::ToFilePath() const {
1794
8
  if (context_.scheme != "file:") {
1795
1
    return "";
1796
  }
1797
1798
#ifdef _WIN32
1799
  const char* slash = "\\";
1800
  auto is_slash = [] (char ch) {
1801
    return ch == '/' || ch == '\\';
1802
  };
1803
#else
1804
7
  const char* slash = "/";
1805
46
  auto is_slash = [] (char ch) {
1806
46
    return ch == '/';
1807
  };
1808

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1809
7
      context_.host.length() > 0) {
1810
1
    return "";
1811
  }
1812
#endif
1813
12
  std::string decoded_path;
1814
18
  for (const std::string& part : context_.path) {
1815
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1816
58
    for (char& ch : decoded) {
1817
46
      if (is_slash(ch)) {
1818
1
        return "";
1819
      }
1820
    }
1821
12
    decoded_path += slash + decoded;
1822
  }
1823
1824
#ifdef _WIN32
1825
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1826
1827
  // If hostname is set, then we have a UNC path. Pass the hostname through
1828
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1829
  // need to worry about percent encoding because the URL parser will have
1830
  // already taken care of that for us. Note that this only causes IDNs with an
1831
  // appropriate `xn--` prefix to be decoded.
1832
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1833
      context_.host.length() > 0) {
1834
    std::string unicode_host;
1835
    if (!ToUnicode(context_.host, &unicode_host)) {
1836
      return "";
1837
    }
1838
    return "\\\\" + unicode_host + decoded_path;
1839
  }
1840
  // Otherwise, it's a local path that requires a drive letter.
1841
  if (decoded_path.length() < 3) {
1842
    return "";
1843
  }
1844
  if (decoded_path[2] != ':' ||
1845
      !IsASCIIAlpha(decoded_path[1])) {
1846
    return "";
1847
  }
1848
  // Strip out the leading '\'.
1849
  return decoded_path.substr(1);
1850
#else
1851
5
  return decoded_path;
1852
#endif
1853
}
1854
1855
33974
URL URL::FromFilePath(const std::string& file_path) {
1856
67948
  URL url("file://");
1857
67948
  std::string escaped_file_path;
1858
3598870
  for (size_t i = 0; i < file_path.length(); ++i) {
1859
3564896
    escaped_file_path += file_path[i];
1860
3564896
    if (file_path[i] == '%')
1861
12
      escaped_file_path += "25";
1862
  }
1863
33974
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1864
             &url.context_, true, nullptr, false);
1865
33974
  return url;
1866
}
1867
1868
// This function works by calling out to a JS function that creates and
1869
// returns the JS URL object. Be mindful of the JS<->Native boundary
1870
// crossing that is required.
1871
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1872
  Isolate* isolate = env->isolate();
1873
  Local<Context> context = env->context();
1874
  Context::Scope context_scope(context);
1875
1876
  const Local<Value> undef = Undefined(isolate);
1877
  const Local<Value> null = Null(isolate);
1878
1879
  if (context_.flags & URL_FLAGS_FAILED)
1880
    return Local<Value>();
1881
1882
  Local<Value> argv[] = {
1883
    undef,
1884
    undef,
1885
    undef,
1886
    undef,
1887
    null,  // host defaults to null
1888
    null,  // port defaults to null
1889
    undef,
1890
    null,  // query defaults to null
1891
    null,  // fragment defaults to null
1892
  };
1893
  SetArgs(env, argv, context_);
1894
1895
  MaybeLocal<Value> ret;
1896
  {
1897
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1898
1899
    // The SetURLConstructor method must have been called already to
1900
    // set the constructor function used below. SetURLConstructor is
1901
    // called automatically when the internal/url.js module is loaded
1902
    // during the internal/bootstrap/node.js processing.
1903
    ret = env->url_constructor_function()
1904
        ->Call(env->context(), undef, arraysize(argv), argv);
1905
  }
1906
1907
  return ret;
1908
}
1909
1910
}  // namespace url
1911
}  // namespace node
1912
1913
5080
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1914
5013
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)