GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1132 1190 95.1 %
Date: 2022-02-22 04:15:11 Branches: 973 1100 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <numeric>
11
#include <string>
12
#include <vector>
13
14
namespace node {
15
16
using errors::TryCatchScope;
17
18
using url::table_data::hex;
19
using url::table_data::C0_CONTROL_ENCODE_SET;
20
using url::table_data::FRAGMENT_ENCODE_SET;
21
using url::table_data::PATH_ENCODE_SET;
22
using url::table_data::USERINFO_ENCODE_SET;
23
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
24
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
25
26
using v8::Array;
27
using v8::Context;
28
using v8::Function;
29
using v8::FunctionCallbackInfo;
30
using v8::HandleScope;
31
using v8::Int32;
32
using v8::Integer;
33
using v8::Isolate;
34
using v8::Local;
35
using v8::MaybeLocal;
36
using v8::NewStringType;
37
using v8::Null;
38
using v8::Object;
39
using v8::String;
40
using v8::Undefined;
41
using v8::Value;
42
43
161762
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
44
161762
  return String::NewFromUtf8(isolate,
45
                             str.data(),
46
                             NewStringType::kNormal,
47
161762
                             str.length()).ToLocalChecked();
48
}
49
50
namespace url {
51
namespace {
52
53
// https://url.spec.whatwg.org/#eof-code-point
54
constexpr char kEOL = -1;
55
56
// https://url.spec.whatwg.org/#concept-host
57
class URLHost {
58
 public:
59
  ~URLHost();
60
61
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
62
  void ParseIPv6Host(const char* input, size_t length);
63
  void ParseOpaqueHost(const char* input, size_t length);
64
  void ParseHost(const char* input,
65
                 size_t length,
66
                 bool is_special,
67
                 bool unicode = false);
68
69
5007
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
70
  std::string ToString() const;
71
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
72
  std::string ToStringMove();
73
74
 private:
75
  enum class HostType {
76
    H_FAILED,
77
    H_DOMAIN,
78
    H_IPV4,
79
    H_IPV6,
80
    H_OPAQUE,
81
  };
82
83
  union Value {
84
    std::string domain_or_opaque;
85
    uint32_t ipv4;
86
    uint16_t ipv6[8];
87
88
5007
    ~Value() {}
89
5007
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
13589
  void Reset() {
96
    using string = std::string;
97
13589
    switch (type_) {
98
4088
      case HostType::H_DOMAIN:
99
      case HostType::H_OPAQUE:
100
4088
        value_.domain_or_opaque.~string();
101
4088
        break;
102
9501
      default:
103
9501
        break;
104
    }
105
13589
    type_ = HostType::H_FAILED;
106
13589
  }
107
108
  // Setting the string members of the union with = is brittle because
109
  // it relies on them being initialized to a state that requires no
110
  // destruction of old data.
111
  // For a long time, that worked well enough because ParseIPv6Host() happens
112
  // to zero-fill `value_`, but that really is relying on standard library
113
  // internals too much.
114
  // These helpers are the easiest solution but we might want to consider
115
  // just not forcing strings into an union.
116
458
  void SetOpaque(std::string&& string) {
117
458
    Reset();
118
458
    type_ = HostType::H_OPAQUE;
119
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
458
  }
121
122
3630
  void SetDomain(std::string&& string) {
123
3630
    Reset();
124
3630
    type_ = HostType::H_DOMAIN;
125
3630
    new(&value_.domain_or_opaque) std::string(std::move(string));
126
3630
  }
127
};
128
129
5007
URLHost::~URLHost() {
130
5007
  Reset();
131
5007
}
132
133
#define ARGS(XX)                                                              \
134
  XX(ARG_FLAGS)                                                               \
135
  XX(ARG_PROTOCOL)                                                            \
136
  XX(ARG_USERNAME)                                                            \
137
  XX(ARG_PASSWORD)                                                            \
138
  XX(ARG_HOST)                                                                \
139
  XX(ARG_PORT)                                                                \
140
  XX(ARG_PATH)                                                                \
141
  XX(ARG_QUERY)                                                               \
142
  XX(ARG_FRAGMENT)                                                            \
143
  XX(ARG_COUNT)  // This one has to be last.
144
145
#define ERR_ARGS(XX)                                                          \
146
  XX(ERR_ARG_FLAGS)                                                           \
147
  XX(ERR_ARG_INPUT)                                                           \
148
149
enum url_cb_args {
150
#define XX(name) name,
151
  ARGS(XX)
152
#undef XX
153
};
154
155
enum url_error_cb_args {
156
#define XX(name) name,
157
  ERR_ARGS(XX)
158
#undef XX
159
};
160
161
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
162
  template <typename T>                                                       \
163
  bool name(const T ch1, const T ch2) {                                \
164
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
165
                  "Character must be wider than " #bits " bits");             \
166
    return (expr);                                                            \
167
  }                                                                           \
168
  template <typename T>                                                       \
169
  bool name(const std::basic_string<T>& str) {                         \
170
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
171
                  "Character must be wider than " #bits " bits");             \
172
    return str.length() >= 2 && name(str[0], str[1]);                         \
173
  }
174
175
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
176

16304185
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
177
178
// https://infra.spec.whatwg.org/#c0-control-or-space
179

330652
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
180
181
// https://infra.spec.whatwg.org/#ascii-digit
182

640238
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
183
184
// https://infra.spec.whatwg.org/#ascii-hex-digit
185


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
186
                               (ch >= 'A' && ch <= 'F') ||
187
                               (ch >= 'a' && ch <= 'f')))
188
189
// https://infra.spec.whatwg.org/#ascii-alpha
190


1424956
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
191
                            (ch >= 'a' && ch <= 'z')))
192
193
// https://infra.spec.whatwg.org/#ascii-alphanumeric
194

626264
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
195
196
// https://infra.spec.whatwg.org/#ascii-lowercase
197
template <typename T>
198
626336
T ASCIILowercase(T ch) {
199
626336
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
200
}
201
202
// https://url.spec.whatwg.org/#forbidden-host-code-point
203









89198
CHAR_TEST(8, IsForbiddenHostCodePoint,
204
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
205
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
206
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
207
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
208
          ch == '^' || ch == '|')
209
210
// https://url.spec.whatwg.org/#windows-drive-letter
211

13048
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
212
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
213
214
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
215

2624
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
216
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
217
218
#undef TWO_CHAR_STRING_TEST
219
220
13773189
bool BitAt(const uint8_t a[], const uint8_t i) {
221
13773189
  return !!(a[i >> 3] & (1 << (i & 7)));
222
}
223
224
// Appends ch to str. If ch position in encode_set is set, the ch will
225
// be percent-encoded then appended.
226
13773189
void AppendOrEscape(std::string* str,
227
                    const unsigned char ch,
228
                    const uint8_t encode_set[]) {
229
13773189
  if (BitAt(encode_set, ch))
230
1921
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
231
  else
232
13771268
    *str += ch;
233
13773189
}
234
235
template <typename T>
236
850
unsigned hex2bin(const T ch) {
237

850
  if (ch >= '0' && ch <= '9')
238
546
    return ch - '0';
239

304
  if (ch >= 'A' && ch <= 'F')
240
172
    return 10 + (ch - 'A');
241

132
  if (ch >= 'a' && ch <= 'f')
242
132
    return 10 + (ch - 'a');
243
  return static_cast<unsigned>(-1);
244
}
245
246
4217
std::string PercentDecode(const char* input, size_t len) {
247
4217
  std::string dest;
248
4217
  if (len == 0)
249
2
    return dest;
250
4215
  dest.reserve(len);
251
4215
  const char* pointer = input;
252
4215
  const char* end = input + len;
253
254
91963
  while (pointer < end) {
255
87748
    const char ch = pointer[0];
256
87748
    size_t remaining = end - pointer - 1;
257


88185
    if (ch != '%' || remaining < 2 ||
258
437
        (ch == '%' &&
259
437
         (!IsASCIIHexDigit(pointer[1]) ||
260
433
          !IsASCIIHexDigit(pointer[2])))) {
261
87323
      dest += ch;
262
87323
      pointer++;
263
87323
      continue;
264
    } else {
265
425
      unsigned a = hex2bin(pointer[1]);
266
425
      unsigned b = hex2bin(pointer[2]);
267
425
      char c = static_cast<char>(a * 16 + b);
268
425
      dest += c;
269
425
      pointer += 3;
270
    }
271
  }
272
4215
  return dest;
273
}
274
275
#define SPECIALS(XX)                                                          \
276
  XX(ftp, 21, "ftp:")                                                         \
277
  XX(file, -1, "file:")                                                       \
278
  XX(http, 80, "http:")                                                       \
279
  XX(https, 443, "https:")                                                    \
280
  XX(ws, 80, "ws:")                                                           \
281
  XX(wss, 443, "wss:")
282
283
381764
bool IsSpecial(const std::string& scheme) {
284
#define V(_, __, name) if (scheme == name) return true;
285



381764
  SPECIALS(V);
286
#undef V
287
6494
  return false;
288
}
289
290
158235
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
291
#define V(key, _, name) if (scheme == name)                                  \
292
    return env->url_special_##key##_string();
293



158235
  SPECIALS(V)
294
#undef V
295
  UNREACHABLE();
296
}
297
298
151091
int NormalizePort(const std::string& scheme, int p) {
299
#define V(_, port, name) if (scheme == name && p == port) return -1;
300









151091
  SPECIALS(V);
301
#undef V
302
10755
  return p;
303
}
304
305
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
306
7338
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
307
7338
  size_t length = end - p;
308
6280
  return length >= 2 &&
309

13654
    IsWindowsDriveLetter(p[0], p[1]) &&
310
36
    (length == 2 ||
311
36
      p[2] == '/' ||
312
14
      p[2] == '\\' ||
313
6
      p[2] == '?' ||
314
7342
      p[2] == '#');
315
}
316
317
#if defined(NODE_HAVE_I18N_SUPPORT)
318
195
bool ToUnicode(const std::string& input, std::string* output) {
319
390
  MaybeStackBuffer<char> buf;
320
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
321
    return false;
322
195
  output->assign(*buf, buf.length());
323
195
  return true;
324
}
325
326
4204
bool ToASCII(const std::string& input, std::string* output) {
327
8408
  MaybeStackBuffer<char> buf;
328
4204
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
329
124
    return false;
330
4080
  if (buf.length() == 0)
331
24
    return false;
332
4056
  output->assign(*buf, buf.length());
333
4056
  return true;
334
}
335
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
336
// Intentional non-ops if ICU is not present.
337
bool ToUnicode(const std::string& input, std::string* output) {
338
  *output = input;
339
  return true;
340
}
341
342
bool ToASCII(const std::string& input, std::string* output) {
343
  *output = input;
344
  return true;
345
}
346
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
347
348
#define NS_IN6ADDRSZ 16
349
350
275
void URLHost::ParseIPv6Host(const char* input, size_t length) {
351
275
  CHECK_EQ(type_, HostType::H_FAILED);
352
353
  unsigned char buf[sizeof(struct in6_addr)];
354
275
  MaybeStackBuffer<char> ipv6(length + 1);
355
275
  *(*ipv6 + length) = 0;
356
275
  memset(buf, 0, sizeof(buf));
357
275
  memcpy(*ipv6, input, sizeof(const char) * length);
358
359
275
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
360
361
275
  if (ret != 0) {
362
92
    return;
363
  }
364
365
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
366
1647
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
367
1464
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
368
  }
369
370
183
  type_ = HostType::H_IPV6;
371
}
372
373
4508
int64_t ParseNumber(const char* start, const char* end) {
374
4508
  unsigned R = 10;
375

4508
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
376
48
    start += 2;
377
48
    R = 16;
378
  }
379
4508
  if (end - start == 0) {
380
8
    return 0;
381

4500
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
382
55
    start++;
383
55
    R = 8;
384
  }
385
4500
  const char* p = start;
386
387
6523
  while (p < end) {
388
5637
    const char ch = p[0];
389

5637
    switch (R) {
390
274
      case 8:
391

274
        if (ch < '0' || ch > '7')
392
29
          return -1;
393
245
        break;
394
5155
      case 10:
395
5155
        if (!IsASCIIDigit(ch))
396
3581
          return -1;
397
1574
        break;
398
208
      case 16:
399
208
        if (!IsASCIIHexDigit(ch))
400
4
          return -1;
401
204
        break;
402
    }
403
2023
    p++;
404
  }
405
886
  return strtoll(start, nullptr, R);
406
}
407
408
3881
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
409
3881
  CHECK_EQ(type_, HostType::H_FAILED);
410
3881
  *is_ipv4 = false;
411
3881
  const char* pointer = input;
412
3881
  const char* mark = input;
413
3881
  const char* end = pointer + length;
414
3881
  int parts = 0;
415
3881
  uint32_t val = 0;
416
  uint64_t numbers[4];
417
3881
  int tooBigNumbers = 0;
418
3881
  if (length == 0)
419
3658
    return;
420
421
36005
  while (pointer <= end) {
422
35758
    const char ch = pointer < end ? pointer[0] : kEOL;
423
35758
    int64_t remaining = end - pointer - 1;
424

35758
    if (ch == '.' || ch == kEOL) {
425
4524
      if (++parts > static_cast<int>(arraysize(numbers)))
426
4
        return;
427
4520
      if (pointer == mark)
428
12
        return;
429
4508
      int64_t n = ParseNumber(mark, pointer);
430
4508
      if (n < 0)
431
3614
        return;
432
433
894
      if (n > 255) {
434
112
        tooBigNumbers++;
435
      }
436
894
      numbers[parts - 1] = n;
437
894
      mark = pointer + 1;
438

894
      if (ch == '.' && remaining == 0)
439
4
        break;
440
    }
441
32124
    pointer++;
442
  }
443
251
  CHECK_GT(parts, 0);
444
251
  *is_ipv4 = true;
445
446
  // If any but the last item in numbers is greater than 255, return failure.
447
  // If the last item in numbers is greater than or equal to
448
  // 256^(5 - the number of items in numbers), return failure.
449
247
  if (tooBigNumbers > 1 ||
450

558
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
451
243
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
452
28
    return;
453
  }
454
455
223
  type_ = HostType::H_IPV4;
456
223
  val = static_cast<uint32_t>(numbers[parts - 1]);
457
768
  for (int n = 0; n < parts - 1; n++) {
458
545
    double b = 3 - n;
459
545
    val +=
460
545
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
461
  }
462
463
223
  value_.ipv4 = val;
464
}
465
466
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
467
520
  CHECK_EQ(type_, HostType::H_FAILED);
468
520
  std::string output;
469
520
  output.reserve(length);
470
3053
  for (size_t i = 0; i < length; i++) {
471
2595
    const char ch = input[i];
472

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
473
62
      return;
474
    } else {
475
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
476
    }
477
  }
478
479
458
  SetOpaque(std::move(output));
480
}
481
482
5007
void URLHost::ParseHost(const char* input,
483
                        size_t length,
484
                        bool is_special,
485
                        bool unicode) {
486
5007
  CHECK_EQ(type_, HostType::H_FAILED);
487
5007
  const char* pointer = input;
488
489
5007
  if (length == 0)
490
1377
    return;
491
492
5007
  if (pointer[0] == '[') {
493
283
    if (pointer[length - 1] != ']')
494
8
      return;
495
275
    return ParseIPv6Host(++pointer, length - 2);
496
  }
497
498
4724
  if (!is_special)
499
520
    return ParseOpaqueHost(input, length);
500
501
  // First, we have to percent decode
502
4204
  std::string decoded = PercentDecode(input, length);
503
504
  // Then we have to punycode toASCII
505
4204
  if (!ToASCII(decoded, &decoded))
506
148
    return;
507
508
  // If any of the following characters are still present, we have to fail
509
90510
  for (size_t n = 0; n < decoded.size(); n++) {
510
86629
    const char ch = decoded[n];
511
86629
    if (IsForbiddenHostCodePoint(ch)) {
512
175
      return;
513
    }
514
  }
515
516
  // Check to see if it's an IPv4 IP address
517
  bool is_ipv4;
518
3881
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
519
3881
  if (is_ipv4)
520
251
    return;
521
522
  // If the unicode flag is set, run the result through punycode ToUnicode
523

3630
  if (unicode && !ToUnicode(decoded, &decoded))
524
    return;
525
526
  // It's not an IPv4 or IPv6 address, it must be a domain
527
3630
  SetDomain(std::move(decoded));
528
}
529
530
// Locates the longest sequence of 0 segments in an IPv6 address
531
// in order to use the :: compression when serializing
532
template <typename T>
533
183
T* FindLongestZeroSequence(T* values, size_t len) {
534
183
  T* start = values;
535
183
  T* end = start + len;
536
183
  T* result = nullptr;
537
538
183
  T* current = nullptr;
539
183
  unsigned counter = 0, longest = 1;
540
541
1647
  while (start < end) {
542
1464
    if (*start == 0) {
543
1231
      if (current == nullptr)
544
197
        current = start;
545
1231
      counter++;
546
    } else {
547
233
      if (counter > longest) {
548
175
        longest = counter;
549
175
        result = current;
550
      }
551
233
      counter = 0;
552
233
      current = nullptr;
553
    }
554
1464
    start++;
555
  }
556
183
  if (counter > longest)
557
6
    result = current;
558
183
  return result;
559
}
560
561
4494
std::string URLHost::ToStringMove() {
562
4494
  std::string return_value;
563
4494
  switch (type_) {
564
4088
    case HostType::H_DOMAIN:
565
    case HostType::H_OPAQUE:
566
4088
      return_value = std::move(value_.domain_or_opaque);
567
4088
      break;
568
406
    default:
569
406
      return_value = ToString();
570
406
      break;
571
  }
572
4494
  Reset();
573
4494
  return return_value;
574
}
575
576
406
std::string URLHost::ToString() const {
577
812
  std::string dest;
578

406
  switch (type_) {
579
    case HostType::H_DOMAIN:
580
    case HostType::H_OPAQUE:
581
      return value_.domain_or_opaque;
582
223
    case HostType::H_IPV4: {
583
223
      dest.reserve(15);
584
223
      uint32_t value = value_.ipv4;
585
1115
      for (int n = 0; n < 4; n++) {
586
892
        dest.insert(0, std::to_string(value % 256));
587
892
        if (n < 3)
588
669
          dest.insert(0, 1, '.');
589
892
        value /= 256;
590
      }
591
223
      break;
592
    }
593
183
    case HostType::H_IPV6: {
594
183
      dest.reserve(41);
595
183
      dest += '[';
596
183
      const uint16_t* start = &value_.ipv6[0];
597
      const uint16_t* compress_pointer =
598
183
          FindLongestZeroSequence(start, 8);
599
183
      bool ignore0 = false;
600
1647
      for (int n = 0; n <= 7; n++) {
601
1464
        const uint16_t* piece = &value_.ipv6[n];
602

1464
        if (ignore0 && *piece == 0)
603
1211
          continue;
604
432
        else if (ignore0)
605
173
          ignore0 = false;
606
432
        if (compress_pointer == piece) {
607
179
          dest += n == 0 ? "::" : ":";
608
179
          ignore0 = true;
609
179
          continue;
610
        }
611
        char buf[5];
612
253
        snprintf(buf, sizeof(buf), "%x", *piece);
613
253
        dest += buf;
614
253
        if (n < 7)
615
76
          dest += ':';
616
      }
617
183
      dest += ']';
618
183
      break;
619
    }
620
    case HostType::H_FAILED:
621
      break;
622
  }
623
406
  return dest;
624
}
625
626
4665
bool ParseHost(const std::string& input,
627
               std::string* output,
628
               bool is_special,
629
               bool unicode = false) {
630
4665
  if (input.empty()) {
631
94
    output->clear();
632
94
    return true;
633
  }
634
9142
  URLHost host;
635
4571
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
636
4571
  if (host.ParsingFailed())
637
489
    return false;
638
4082
  *output = host.ToStringMove();
639
4082
  return true;
640
}
641
642
9432
std::vector<std::string> FromJSStringArray(Environment* env,
643
                                           Local<Array> array) {
644
9432
  std::vector<std::string> vec;
645
9432
  if (array->Length() > 0)
646
9416
    vec.reserve(array->Length());
647
150042
  for (size_t n = 0; n < array->Length(); n++) {
648
131178
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
649
131178
    if (val->IsString()) {
650
65589
      Utf8Value value(env->isolate(), val.As<String>());
651
65589
      vec.emplace_back(*value, value.length());
652
    }
653
  }
654
9432
  return vec;
655
}
656
657
9432
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
658
9432
  url_data base;
659
9432
  Local<Context> context = env->context();
660
661
  Local<Value> flags =
662
28296
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
663
9432
  if (flags->IsInt32())
664
18864
    base.flags = flags->Int32Value(context).FromJust();
665
666
  Local<Value> port =
667
28296
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
668
9432
  if (port->IsInt32())
669
40
    base.port = port->Int32Value(context).FromJust();
670
671
  Local<Value> scheme =
672
18864
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
673
9432
  base.scheme = Utf8Value(env->isolate(), scheme).out();
674
675
  auto GetStr = [&](std::string url_data::*member,
676
                    int flag,
677
                    Local<String> name,
678
47160
                    bool empty_as_present) {
679
94320
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
680
94320
    if (value->IsString()) {
681
54316
      Utf8Value utf8value(env->isolate(), value.As<String>());
682
27158
      (base.*member).assign(*utf8value, utf8value.length());
683

46022
      if (empty_as_present || value.As<String>()->Length() != 0) {
684
8316
        base.flags |= flag;
685
      }
686
    }
687
56592
  };
688
9432
  GetStr(&url_data::username,
689
         URL_FLAGS_HAS_USERNAME,
690
         env->username_string(),
691
         false);
692
9432
  GetStr(&url_data::password,
693
         URL_FLAGS_HAS_PASSWORD,
694
         env->password_string(),
695
         false);
696
9432
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
697
9432
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
698
9432
  GetStr(&url_data::fragment,
699
         URL_FLAGS_HAS_FRAGMENT,
700
         env->fragment_string(),
701
         true);
702
703
  Local<Value>
704
28296
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
705
9432
  if (path->IsArray()) {
706
9432
    base.flags |= URL_FLAGS_HAS_PATH;
707
9432
    base.path = FromJSStringArray(env, path.As<Array>());
708
  }
709
9432
  return base;
710
}
711
712
47861
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
713
47861
  url_data context;
714
  Local<Value> flags =
715
143583
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
716
47861
  if (flags->IsInt32()) {
717
    static constexpr int32_t kCopyFlagsMask =
718
        URL_FLAGS_SPECIAL |
719
        URL_FLAGS_CANNOT_BE_BASE |
720
        URL_FLAGS_HAS_USERNAME |
721
        URL_FLAGS_HAS_PASSWORD |
722
        URL_FLAGS_HAS_HOST;
723
47861
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
724
  }
725
  Local<Value> scheme =
726
143583
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
727
95722
  if (scheme->IsString()) {
728
95722
    Utf8Value value(env->isolate(), scheme);
729
47861
    context.scheme.assign(*value, value.length());
730
  }
731
  Local<Value> port =
732
143583
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
733
47861
  if (port->IsInt32())
734
237
    context.port = port.As<Int32>()->Value();
735
47861
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
736
    Local<Value> username =
737
221
        context_obj->Get(env->context(),
738
663
                         env->username_string()).ToLocalChecked();
739
442
    CHECK(username->IsString());
740
442
    Utf8Value value(env->isolate(), username);
741
221
    context.username.assign(*value, value.length());
742
  }
743
47861
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
744
    Local<Value> password =
745
209
        context_obj->Get(env->context(),
746
627
                         env->password_string()).ToLocalChecked();
747
418
    CHECK(password->IsString());
748
418
    Utf8Value value(env->isolate(), password);
749
209
    context.password.assign(*value, value.length());
750
  }
751
  Local<Value> host =
752
47861
      context_obj->Get(env->context(),
753
143583
                       env->host_string()).ToLocalChecked();
754
95722
  if (host->IsString()) {
755
95646
    Utf8Value value(env->isolate(), host);
756
47823
    context.host.assign(*value, value.length());
757
  }
758
47861
  return context;
759
}
760
761
// Single dot segment can be ".", "%2e", or "%2E"
762
2907100
bool IsSingleDotSegment(const std::string& str) {
763
2907100
  switch (str.size()) {
764
7954
    case 1:
765
7954
      return str == ".";
766
183246
    case 3:
767
183246
      return str[0] == '%' &&
768

183292
             str[1] == '2' &&
769
183292
             ASCIILowercase(str[2]) == 'e';
770
2715900
    default:
771
2715900
      return false;
772
  }
773
}
774
775
// Double dot segment can be:
776
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
777
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
778
1458474
bool IsDoubleDotSegment(const std::string& str) {
779

1458474
  switch (str.size()) {
780
4932
    case 2:
781
4932
      return str == "..";
782
385215
    case 4:
783

385215
      if (str[0] != '.' && str[0] != '%')
784
385196
        return false;
785
19
      return ((str[0] == '.' &&
786
13
               str[1] == '%' &&
787

8
               str[2] == '2' &&
788
42
               ASCIILowercase(str[3]) == 'e') ||
789
15
              (str[0] == '%' &&
790

12
               str[1] == '2' &&
791
6
               ASCIILowercase(str[2]) == 'e' &&
792
25
               str[3] == '.'));
793
88873
    case 6:
794
88873
      return (str[0] == '%' &&
795

24
              str[1] == '2' &&
796
12
              ASCIILowercase(str[2]) == 'e' &&
797
4
              str[3] == '%' &&
798

88889
              str[4] == '2' &&
799
88877
              ASCIILowercase(str[5]) == 'e');
800
979454
    default:
801
979454
      return false;
802
  }
803
}
804
805
11741
void ShortenUrlPath(struct url_data* url) {
806
11741
  if (url->path.empty()) return;
807


11929
  if (url->path.size() == 1 && url->scheme == "file:" &&
808
578
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
809
11351
  url->path.pop_back();
810
}
811
812
}  // anonymous namespace
813
814
253508
void URL::Parse(const char* input,
815
                size_t len,
816
                enum url_parse_state state_override,
817
                struct url_data* url,
818
                bool has_url,
819
                const struct url_data* base,
820
                bool has_base) {
821
253508
  const char* p = input;
822
253508
  const char* end = input + len;
823
824
253508
  if (!has_url) {
825
165349
    for (const char* ptr = p; ptr < end; ptr++) {
826
165330
      if (IsC0ControlOrSpace(*ptr))
827
56
        p++;
828
      else
829
165274
        break;
830
    }
831
165341
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
832
165322
      if (IsC0ControlOrSpace(*ptr))
833
48
        end--;
834
      else
835
165274
        break;
836
    }
837
165293
    input = p;
838
165293
    len = end - p;
839
  }
840
841
  // The spec says we should strip out any ASCII tabs or newlines.
842
  // In those cases, we create another std::string instance with the filtered
843
  // contents, but in the general case we avoid the overhead.
844
253508
  std::string whitespace_stripped;
845
16556670
  for (const char* ptr = p; ptr < end; ptr++) {
846
16303332
    if (!IsASCIITabOrNewline(*ptr))
847
16303162
      continue;
848
    // Hit tab or newline. Allocate storage, copy what we have until now,
849
    // and then iterate and filter all similar characters out.
850
170
    whitespace_stripped.reserve(len - 1);
851
170
    whitespace_stripped.assign(p, ptr - p);
852
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
853
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
854
853
      if (!IsASCIITabOrNewline(*ptr))
855
769
        whitespace_stripped += *ptr;
856
    }
857
858
    // Update variables like they should have looked like if the string
859
    // had been stripped of whitespace to begin with.
860
170
    input = whitespace_stripped.c_str();
861
170
    len = whitespace_stripped.size();
862
170
    p = input;
863
170
    end = input + len;
864
170
    break;
865
  }
866
867
253508
  bool atflag = false;  // Set when @ has been seen.
868
253508
  bool square_bracket_flag = false;  // Set inside of [...]
869
253508
  bool password_token_seen_flag = false;  // Set after a : after an username.
870
871
253508
  std::string buffer;
872
873
  // Set the initial parse state.
874
253508
  const bool has_state_override = state_override != kUnknownState;
875
253508
  enum url_parse_state state = has_state_override ? state_override :
876
                                                    kSchemeStart;
877
878

253508
  if (state < kSchemeStart || state > kFragment) {
879
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
880
    return;
881
  }
882
883
17132205
  while (p <= end) {
884
16887500
    const char ch = p < end ? p[0] : kEOL;
885
16887500
    bool special = (url->flags & URL_FLAGS_SPECIAL);
886
    bool cannot_be_base;
887

16887500
    bool special_back_slash = (special && ch == '\\');
888
889





16887500
    switch (state) {
890
165376
      case kSchemeStart:
891
165376
        if (IsASCIIAlpha(ch)) {
892
151811
          buffer += ASCIILowercase(ch);
893
151811
          state = kScheme;
894
13565
        } else if (!has_state_override) {
895
13555
          state = kNoScheme;
896
13555
          continue;
897
        } else {
898
10
          url->flags |= URL_FLAGS_FAILED;
899
10
          return;
900
        }
901
151811
        break;
902
626264
      case kScheme:
903


626264
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
904
474453
          buffer += ASCIILowercase(ch);
905

151811
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
906

149781
          if (has_state_override && buffer.size() == 0) {
907
            url->flags |= URL_FLAGS_TERMINATED;
908
            return;
909
          }
910
149781
          buffer += ':';
911
912
149781
          bool new_is_special = IsSpecial(buffer);
913
914
149781
          if (has_state_override) {
915
45
            if ((special != new_is_special) ||
916
45
                ((buffer == "file:") &&
917
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
918
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
919


116
                  (url->port != -1))) ||
920
45
                  (url->scheme == "file:" && url->host.empty())) {
921
32
              url->flags |= URL_FLAGS_TERMINATED;
922
32
              return;
923
            }
924
          }
925
926
149749
          url->scheme = std::move(buffer);
927
149749
          url->port = NormalizePort(url->scheme, url->port);
928
149749
          if (new_is_special) {
929
143961
            url->flags |= URL_FLAGS_SPECIAL;
930
143961
            special = true;
931
          } else {
932
5788
            url->flags &= ~URL_FLAGS_SPECIAL;
933
5788
            special = false;
934
          }
935

149749
          special_back_slash = (special && ch == '\\');
936
149749
          buffer.clear();
937
149749
          if (has_state_override)
938
33
            return;
939
149716
          if (url->scheme == "file:") {
940
140287
            state = kFile;
941
3657
          } else if (special &&
942

13086
                     has_base &&
943
1033
                     url->scheme == base->scheme) {
944
323
            state = kSpecialRelativeOrAuthority;
945
9106
          } else if (special) {
946
3334
            state = kSpecialAuthoritySlashes;
947

5772
          } else if (p + 1 < end && p[1] == '/') {
948
716
            state = kPathOrAuthority;
949
716
            p++;
950
          } else {
951
5056
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
952
5056
            url->flags |= URL_FLAGS_HAS_PATH;
953
5056
            url->path.emplace_back("");
954
5056
            state = kCannotBeBase;
955
149716
          }
956
2030
        } else if (!has_state_override) {
957
2022
          buffer.clear();
958
2022
          state = kNoScheme;
959
2022
          p = input;
960
2022
          continue;
961
        } else {
962
8
          url->flags |= URL_FLAGS_FAILED;
963
8
          return;
964
        }
965
624169
        break;
966
15577
      case kNoScheme:
967

15577
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
968

15577
        if (!has_base || (cannot_be_base && ch != '#')) {
969
7819
          url->flags |= URL_FLAGS_FAILED;
970
7819
          return;
971

7758
        } else if (cannot_be_base && ch == '#') {
972
28
          url->scheme = base->scheme;
973
28
          if (IsSpecial(url->scheme)) {
974
            url->flags |= URL_FLAGS_SPECIAL;
975
            special = true;
976
          } else {
977
28
            url->flags &= ~URL_FLAGS_SPECIAL;
978
28
            special = false;
979
          }
980

28
          special_back_slash = (special && ch == '\\');
981
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
982
28
            url->flags |= URL_FLAGS_HAS_PATH;
983
28
            url->path = base->path;
984
          }
985
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
986
4
            url->flags |= URL_FLAGS_HAS_QUERY;
987
4
            url->query = base->query;
988
          }
989
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
990
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
991
            url->fragment = base->fragment;
992
          }
993
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
994
28
          state = kFragment;
995

15460
        } else if (has_base &&
996
7730
                   base->scheme != "file:") {
997
371
          state = kRelative;
998
371
          continue;
999
        } else {
1000
7359
          url->scheme = "file:";
1001
7359
          url->flags |= URL_FLAGS_SPECIAL;
1002
7359
          special = true;
1003
7359
          state = kFile;
1004

7359
          special_back_slash = (special && ch == '\\');
1005
7359
          continue;
1006
        }
1007
28
        break;
1008
323
      case kSpecialRelativeOrAuthority:
1009

323
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1010
291
          state = kSpecialAuthorityIgnoreSlashes;
1011
291
          p++;
1012
        } else {
1013
32
          state = kRelative;
1014
32
          continue;
1015
        }
1016
291
        break;
1017
716
      case kPathOrAuthority:
1018
716
        if (ch == '/') {
1019
548
          state = kAuthority;
1020
        } else {
1021
168
          state = kPath;
1022
168
          continue;
1023
        }
1024
548
        break;
1025
403
      case kRelative:
1026
403
        url->scheme = base->scheme;
1027
403
        if (IsSpecial(url->scheme)) {
1028
303
          url->flags |= URL_FLAGS_SPECIAL;
1029
303
          special = true;
1030
        } else {
1031
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1032
100
          special = false;
1033
        }
1034

403
        special_back_slash = (special && ch == '\\');
1035

403
        switch (ch) {
1036
18
          case kEOL:
1037
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1038
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1039
4
              url->username = base->username;
1040
            }
1041
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1042
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1043
4
              url->password = base->password;
1044
            }
1045
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1046
16
              url->flags |= URL_FLAGS_HAS_HOST;
1047
16
              url->host = base->host;
1048
            }
1049
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1050
              url->flags |= URL_FLAGS_HAS_QUERY;
1051
              url->query = base->query;
1052
            }
1053
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1054
18
              url->flags |= URL_FLAGS_HAS_PATH;
1055
18
              url->path = base->path;
1056
            }
1057
18
            url->port = base->port;
1058
18
            break;
1059
124
          case '/':
1060
124
            state = kRelativeSlash;
1061
124
            break;
1062
38
          case '?':
1063
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1064
              url->flags |= URL_FLAGS_HAS_USERNAME;
1065
              url->username = base->username;
1066
            }
1067
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1068
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1069
              url->password = base->password;
1070
            }
1071
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1072
34
              url->flags |= URL_FLAGS_HAS_HOST;
1073
34
              url->host = base->host;
1074
            }
1075
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1076
38
              url->flags |= URL_FLAGS_HAS_PATH;
1077
38
              url->path = base->path;
1078
            }
1079
38
            url->port = base->port;
1080
38
            state = kQuery;
1081
38
            break;
1082
38
          case '#':
1083
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1084
              url->flags |= URL_FLAGS_HAS_USERNAME;
1085
              url->username = base->username;
1086
            }
1087
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1088
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1089
              url->password = base->password;
1090
            }
1091
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1092
34
              url->flags |= URL_FLAGS_HAS_HOST;
1093
34
              url->host = base->host;
1094
            }
1095
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1096
              url->flags |= URL_FLAGS_HAS_QUERY;
1097
              url->query = base->query;
1098
            }
1099
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1100
38
              url->flags |= URL_FLAGS_HAS_PATH;
1101
38
              url->path = base->path;
1102
            }
1103
38
            url->port = base->port;
1104
38
            state = kFragment;
1105
38
            break;
1106
185
          default:
1107
185
            if (special_back_slash) {
1108
18
              state = kRelativeSlash;
1109
            } else {
1110
167
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1111
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1112
1
                url->username = base->username;
1113
              }
1114
167
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1115
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1116
1
                url->password = base->password;
1117
              }
1118
167
              if (base->flags & URL_FLAGS_HAS_HOST) {
1119
147
                url->flags |= URL_FLAGS_HAS_HOST;
1120
147
                url->host = base->host;
1121
              }
1122
167
              if (base->flags & URL_FLAGS_HAS_PATH) {
1123
167
                url->flags |= URL_FLAGS_HAS_PATH;
1124
167
                url->path = base->path;
1125
167
                ShortenUrlPath(url);
1126
              }
1127
167
              url->port = base->port;
1128
167
              state = kPath;
1129
167
              continue;
1130
            }
1131
        }
1132
236
        break;
1133
142
      case kRelativeSlash:
1134


142
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1135
22
          state = kSpecialAuthorityIgnoreSlashes;
1136
120
        } else if (ch == '/') {
1137
6
          state = kAuthority;
1138
        } else {
1139
114
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1140
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1141
8
            url->username = base->username;
1142
          }
1143
114
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1144
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1145
4
            url->password = base->password;
1146
          }
1147
114
          if (base->flags & URL_FLAGS_HAS_HOST) {
1148
106
            url->flags |= URL_FLAGS_HAS_HOST;
1149
106
            url->host = base->host;
1150
          }
1151
114
          url->port = base->port;
1152
114
          state = kPath;
1153
114
          continue;
1154
        }
1155
28
        break;
1156
3334
      case kSpecialAuthoritySlashes:
1157
3334
        state = kSpecialAuthorityIgnoreSlashes;
1158

3334
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1159
3185
          p++;
1160
        } else {
1161
149
          continue;
1162
        }
1163
3185
        break;
1164
3724
      case kSpecialAuthorityIgnoreSlashes:
1165

3724
        if (ch != '/' && ch != '\\') {
1166
3647
          state = kAuthority;
1167
3647
          continue;
1168
        }
1169
77
        break;
1170
90460
      case kAuthority:
1171
90460
        if (ch == '@') {
1172
565
          if (atflag) {
1173
41
            buffer.reserve(buffer.size() + 3);
1174
41
            buffer.insert(0, "%40");
1175
          }
1176
565
          atflag = true;
1177
565
          size_t blen = buffer.size();
1178

565
          if (blen > 0 && buffer[0] != ':') {
1179
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1180
          }
1181
6652
          for (size_t n = 0; n < blen; n++) {
1182
6087
            const char bch = buffer[n];
1183
6087
            if (bch == ':') {
1184
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1185
444
              if (!password_token_seen_flag) {
1186
428
                password_token_seen_flag = true;
1187
428
                continue;
1188
              }
1189
            }
1190
5659
            if (password_token_seen_flag) {
1191
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1192
            } else {
1193
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1194
            }
1195
          }
1196
565
          buffer.clear();
1197

89895
        } else if (ch == kEOL ||
1198
85764
                   ch == '/' ||
1199
85732
                   ch == '?' ||
1200
85714
                   ch == '#' ||
1201
                   special_back_slash) {
1202

4201
          if (atflag && buffer.size() == 0) {
1203
52
            url->flags |= URL_FLAGS_FAILED;
1204
52
            return;
1205
          }
1206
4149
          p -= buffer.size() + 1;
1207
4149
          buffer.clear();
1208
4149
          state = kHost;
1209
        } else {
1210
85694
          buffer += ch;
1211
        }
1212
90408
        break;
1213
83153
      case kHost:
1214
      case kHostname:
1215

83153
        if (has_state_override && url->scheme == "file:") {
1216
12
          state = kFileHost;
1217
12
          continue;
1218

83141
        } else if (ch == ':' && !square_bracket_flag) {
1219
1382
          if (buffer.size() == 0) {
1220
24
            url->flags |= URL_FLAGS_FAILED;
1221
24
            return;
1222
          }
1223
1358
          if (state_override == kHostname) {
1224
4
            return;
1225
          }
1226
1354
          url->flags |= URL_FLAGS_HAS_HOST;
1227
1354
          if (!ParseHost(buffer, &url->host, special)) {
1228
5
            url->flags |= URL_FLAGS_FAILED;
1229
5
            return;
1230
          }
1231
1349
          buffer.clear();
1232
1349
          state = kPort;
1233

81759
        } else if (ch == kEOL ||
1234
78702
                   ch == '/' ||
1235
78662
                   ch == '?' ||
1236
78636
                   ch == '#' ||
1237
                   special_back_slash) {
1238
3147
          p--;
1239

3147
          if (special && buffer.size() == 0) {
1240
21
            url->flags |= URL_FLAGS_FAILED;
1241
21
            return;
1242
          }
1243
331
          if (has_state_override &&
1244

3495
              buffer.size() == 0 &&
1245
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1246
38
               url->port != -1)) {
1247
8
            url->flags |= URL_FLAGS_TERMINATED;
1248
8
            return;
1249
          }
1250
3118
          url->flags |= URL_FLAGS_HAS_HOST;
1251
3118
          if (!ParseHost(buffer, &url->host, special)) {
1252
432
            url->flags |= URL_FLAGS_FAILED;
1253
432
            return;
1254
          }
1255
2686
          buffer.clear();
1256
2686
          state = kPathStart;
1257
2686
          if (has_state_override) {
1258
227
            return;
1259
          }
1260
        } else {
1261
78612
          if (ch == '[')
1262
277
            square_bracket_flag = true;
1263
78612
          if (ch == ']')
1264
273
            square_bracket_flag = false;
1265
78612
          buffer += ch;
1266
        }
1267
82420
        break;
1268
7741
      case kPort:
1269
7741
        if (IsASCIIDigit(ch)) {
1270
6323
          buffer += ch;
1271

1418
        } else if (has_state_override ||
1272
902
                   ch == kEOL ||
1273
36
                   ch == '/' ||
1274
36
                   ch == '?' ||
1275
36
                   ch == '#' ||
1276
                   special_back_slash) {
1277
1382
          if (buffer.size() > 0) {
1278
1368
            unsigned port = 0;
1279
            // the condition port <= 0xffff prevents integer overflow
1280

7475
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1281
6107
              port = port * 10 + buffer[i] - '0';
1282
1368
            if (port > 0xffff) {
1283
              // TODO(TimothyGu): This hack is currently needed for the host
1284
              // setter since it needs access to hostname if it is valid, and
1285
              // if the FAILED flag is set the entire response to JS layer
1286
              // will be empty.
1287
26
              if (state_override == kHost)
1288
2
                url->port = -1;
1289
              else
1290
24
                url->flags |= URL_FLAGS_FAILED;
1291
26
              return;
1292
            }
1293
            // the port is valid
1294
1342
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1295
1342
            if (url->port == -1)
1296
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1297
1342
            buffer.clear();
1298
14
          } else if (has_state_override) {
1299
            // TODO(TimothyGu): Similar case as above.
1300
6
            if (state_override == kHost)
1301
2
              url->port = -1;
1302
            else
1303
4
              url->flags |= URL_FLAGS_TERMINATED;
1304
6
            return;
1305
          }
1306
1350
          state = kPathStart;
1307
1350
          continue;
1308
        } else {
1309
36
          url->flags |= URL_FLAGS_FAILED;
1310
36
          return;
1311
        }
1312
6323
        break;
1313
147646
      case kFile:
1314
147646
        url->scheme = "file:";
1315
147646
        url->host.clear();
1316
147646
        url->flags |= URL_FLAGS_HAS_HOST;
1317

147646
        if (ch == '/' || ch == '\\') {
1318
140405
          state = kFileSlash;
1319

7241
        } else if (has_base && base->scheme == "file:") {
1320

7222
          switch (ch) {
1321
4
            case kEOL:
1322
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1323
4
                url->host = base->host;
1324
              }
1325
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1326
4
                url->flags |= URL_FLAGS_HAS_PATH;
1327
4
                url->path = base->path;
1328
              }
1329
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1330
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1331
4
                url->query = base->query;
1332
              }
1333
4
              break;
1334
4
            case '?':
1335
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1336
4
                url->host = base->host;
1337
              }
1338
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1339
4
                url->flags |= URL_FLAGS_HAS_PATH;
1340
4
                url->path = base->path;
1341
              }
1342
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1343
4
              url->query.clear();
1344
4
              state = kQuery;
1345
4
              break;
1346
4
            case '#':
1347
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1348
4
                url->host = base->host;
1349
              }
1350
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1351
4
                url->flags |= URL_FLAGS_HAS_PATH;
1352
4
                url->path = base->path;
1353
              }
1354
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1355
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1356
4
                url->query = base->query;
1357
              }
1358
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1359
4
              url->fragment.clear();
1360
4
              state = kFragment;
1361
4
              break;
1362
7210
            default:
1363
7210
              url->query.clear();
1364
7210
              if (base->flags & URL_FLAGS_HAS_HOST) {
1365
7210
                url->host = base->host;
1366
              }
1367
7210
              if (base->flags & URL_FLAGS_HAS_PATH) {
1368
7210
                url->flags |= URL_FLAGS_HAS_PATH;
1369
7210
                url->path = base->path;
1370
              }
1371
7210
              if (!StartsWithWindowsDriveLetter(p, end)) {
1372
7186
                ShortenUrlPath(url);
1373
              } else {
1374
24
                url->path.clear();
1375
              }
1376
7210
              state = kPath;
1377
7210
              continue;
1378
          }
1379
        } else {
1380
19
          state = kPath;
1381
19
          continue;
1382
        }
1383
140417
        break;
1384
140405
      case kFileSlash:
1385

140405
        if (ch == '/' || ch == '\\') {
1386
140223
          state = kFileHost;
1387
        } else {
1388

182
          if (has_base && base->scheme == "file:") {
1389
128
            url->flags |= URL_FLAGS_HAS_HOST;
1390
128
            url->host = base->host;
1391

242
            if (!StartsWithWindowsDriveLetter(p, end) &&
1392
114
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1393
4
              url->flags |= URL_FLAGS_HAS_PATH;
1394
4
              url->path.push_back(base->path[0]);
1395
            }
1396
          }
1397
182
          state = kPath;
1398
182
          continue;
1399
        }
1400
140223
        break;
1401
141340
      case kFileHost:
1402

141340
        if (ch == kEOL ||
1403
1115
            ch == '/' ||
1404
1105
            ch == '\\' ||
1405
1105
            ch == '?' ||
1406
            ch == '#') {
1407
140223
          if (!has_state_override &&
1408

280458
              buffer.size() == 2 &&
1409
22
              IsWindowsDriveLetter(buffer)) {
1410
12
            state = kPath;
1411
140223
          } else if (buffer.size() == 0) {
1412
140030
            url->flags |= URL_FLAGS_HAS_HOST;
1413
140030
            url->host.clear();
1414
140030
            if (has_state_override)
1415
4
              return;
1416
140026
            state = kPathStart;
1417
          } else {
1418
193
            std::string host;
1419
193
            if (!ParseHost(buffer, &host, special)) {
1420
52
              url->flags |= URL_FLAGS_FAILED;
1421
52
              return;
1422
            }
1423
141
            if (host == "localhost")
1424
37
              host.clear();
1425
141
            url->flags |= URL_FLAGS_HAS_HOST;
1426
141
            url->host = host;
1427
141
            if (has_state_override)
1428
4
              return;
1429
137
            buffer.clear();
1430
137
            state = kPathStart;
1431
          }
1432
140175
          continue;
1433
        } else {
1434
1105
          buffer += ch;
1435
        }
1436
1105
        break;
1437
231410
      case kPathStart:
1438
231410
        if (IsSpecial(url->scheme)) {
1439
230862
          state = kPath;
1440

230862
          if (ch != '/' && ch != '\\') {
1441
88178
            continue;
1442
          }
1443

548
        } else if (!has_state_override && ch == '?') {
1444
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1445
6
          url->query.clear();
1446
6
          state = kQuery;
1447

542
        } else if (!has_state_override && ch == '#') {
1448
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1449
6
          url->fragment.clear();
1450
6
          state = kFragment;
1451
536
        } else if (ch != kEOL) {
1452
459
          state = kPath;
1453
459
          if (ch != '/') {
1454
35
            continue;
1455
          }
1456

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1457
2
          url->flags |= URL_FLAGS_HAS_PATH;
1458
2
          url->path.emplace_back("");
1459
        }
1460
143197
        break;
1461
15135886
      case kPath:
1462

15135886
        if (ch == kEOL ||
1463
13678345
            ch == '/' ||
1464
13678275
            special_back_slash ||
1465

13678275
            (!has_state_override && (ch == '?' || ch == '#'))) {
1466
1458474
          if (IsDoubleDotSegment(buffer)) {
1467
4388
            ShortenUrlPath(url);
1468

4388
            if (ch != '/' && !special_back_slash) {
1469
280
              url->flags |= URL_FLAGS_HAS_PATH;
1470
280
              url->path.emplace_back("");
1471
            }
1472
1457874
          } else if (IsSingleDotSegment(buffer) &&
1473

1457874
                     ch != '/' && !special_back_slash) {
1474
1072
            url->flags |= URL_FLAGS_HAS_PATH;
1475
1072
            url->path.emplace_back("");
1476
1453014
          } else if (!IsSingleDotSegment(buffer)) {
1477
2895125
            if (url->scheme == "file:" &&
1478
1632671
                url->path.empty() &&
1479

3082969
                buffer.size() == 2 &&
1480
100
                IsWindowsDriveLetter(buffer)) {
1481
98
              buffer[1] = ':';
1482
            }
1483
1450298
            url->flags |= URL_FLAGS_HAS_PATH;
1484
1450298
            url->path.emplace_back(std::move(buffer));
1485
          }
1486
1458474
          buffer.clear();
1487
2916948
          if (ch == '?') {
1488
808
            url->flags |= URL_FLAGS_HAS_QUERY;
1489
808
            url->query.clear();
1490
808
            state = kQuery;
1491
1457666
          } else if (ch == '#') {
1492
55
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1493
55
            url->fragment.clear();
1494
55
            state = kFragment;
1495
          }
1496
        } else {
1497
13677412
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1498
        }
1499
15135886
        break;
1500
49273
      case kCannotBeBase:
1501
49273
        switch (ch) {
1502
4
          case '?':
1503
4
            state = kQuery;
1504
4
            break;
1505
10
          case '#':
1506
10
            state = kFragment;
1507
10
            break;
1508
49259
          default:
1509
49259
            if (url->path.empty())
1510
              url->path.emplace_back("");
1511
49259
            else if (ch != kEOL)
1512
44217
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1513
        }
1514
49273
        break;
1515
40072
      case kQuery:
1516

40072
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1517
1019
          url->flags |= URL_FLAGS_HAS_QUERY;
1518
1019
          url->query = std::move(buffer);
1519
1019
          buffer.clear();
1520
1408
          if (ch == '#')
1521
389
            state = kFragment;
1522
        } else {
1523
39053
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1524
                                                QUERY_ENCODE_SET_NONSPECIAL);
1525
        }
1526
40072
        break;
1527
4255
      case kFragment:
1528
4255
        switch (ch) {
1529
604
          case kEOL:
1530
604
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1531
604
            url->fragment = std::move(buffer);
1532
604
            break;
1533
3651
          default:
1534
3651
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1535
        }
1536
4255
        break;
1537
      default:
1538
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1539
        return;
1540
    }
1541
1542
16613952
    p++;
1543
  }
1544
}  // NOLINT(readability/fn_size)
1545
1546
// https://url.spec.whatwg.org/#url-serializing
1547
40354
std::string URL::SerializeURL(const url_data& url,
1548
                              bool exclude = false) {
1549
40354
  std::string output;
1550
40354
  output.reserve(
1551
    10 +  // We generally insert < 10 separator characters between URL parts
1552
40354
    url.scheme.size() +
1553
40354
    url.username.size() +
1554
40354
    url.password.size() +
1555
40354
    url.host.size() +
1556
40354
    url.query.size() +
1557
40354
    url.fragment.size() +
1558
40354
    url.href.size() +
1559
40354
    std::accumulate(
1560
        url.path.begin(),
1561
        url.path.end(),
1562
        0,
1563
435853
        [](size_t sum, const auto& str) { return sum + str.size(); }));
1564
1565
40354
  output += url.scheme;
1566
40354
  if (url.flags & URL_FLAGS_HAS_HOST) {
1567
40354
    output += "//";
1568
40354
    if (url.flags & URL_FLAGS_HAS_USERNAME ||
1569
40354
        url.flags & URL_FLAGS_HAS_PASSWORD) {
1570
      if (url.flags & URL_FLAGS_HAS_USERNAME) {
1571
        output += url.username;
1572
      }
1573
      if (url.flags & URL_FLAGS_HAS_PASSWORD) {
1574
        output += ":" + url.password;
1575
      }
1576
      output += "@";
1577
    }
1578
40354
    output += url.host;
1579
40354
    if (url.port != -1) {
1580
      output += ":" + std::to_string(url.port);
1581
    }
1582
  }
1583
40354
  if (url.flags & URL_FLAGS_CANNOT_BE_BASE) {
1584
    output += url.path[0];
1585
  } else {
1586
    if (!(url.flags & URL_FLAGS_HAS_HOST) &&
1587

40354
          url.path.size() > 1 &&
1588
          url.path[0].empty()) {
1589
      output += "/.";
1590
    }
1591
435853
    for (size_t i = 1; i < url.path.size(); i++) {
1592
395499
      output += "/" + url.path[i];
1593
    }
1594
  }
1595
40354
  if (url.flags & URL_FLAGS_HAS_QUERY) {
1596
    output += "?" + url.query;
1597
  }
1598

40354
  if (!exclude && (url.flags & URL_FLAGS_HAS_FRAGMENT)) {
1599
    output += "#" + url.fragment;
1600
  }
1601
40354
  output.shrink_to_fit();
1602
40354
  return output;
1603
}
1604
1605
namespace {
1606
164248
void SetArgs(Environment* env,
1607
             Local<Value> argv[ARG_COUNT],
1608
             const struct url_data& url) {
1609
164248
  Isolate* isolate = env->isolate();
1610
164248
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1611
328496
  argv[ARG_PROTOCOL] =
1612
164248
      url.flags & URL_FLAGS_SPECIAL ?
1613
158235
          GetSpecial(env, url.scheme) :
1614
6013
          OneByteString(isolate, url.scheme.c_str());
1615
164248
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1616
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1617
164248
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1618
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1619
164248
  if (url.flags & URL_FLAGS_HAS_HOST)
1620
317862
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1621
164248
  if (url.flags & URL_FLAGS_HAS_QUERY)
1622
2054
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1623
164248
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1624
1200
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1625
164248
  if (url.port > -1)
1626
2928
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1627
164248
  if (url.flags & URL_FLAGS_HAS_PATH)
1628
327304
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1629
164248
}
1630
1631
172771
void Parse(Environment* env,
1632
           Local<Value> recv,
1633
           const char* input,
1634
           size_t len,
1635
           enum url_parse_state state_override,
1636
           Local<Value> base_obj,
1637
           Local<Value> context_obj,
1638
           Local<Function> cb,
1639
           Local<Value> error_cb) {
1640
172771
  Isolate* isolate = env->isolate();
1641
172771
  Local<Context> context = env->context();
1642
172771
  HandleScope handle_scope(isolate);
1643
172771
  Context::Scope context_scope(context);
1644
1645
172771
  const bool has_context = context_obj->IsObject();
1646
172771
  const bool has_base = base_obj->IsObject();
1647
1648
172771
  url_data base;
1649
172771
  url_data url;
1650
172771
  if (has_context)
1651
47861
    url = HarvestContext(env, context_obj.As<Object>());
1652
172771
  if (has_base)
1653
9432
    base = HarvestBase(env, base_obj.As<Object>());
1654
1655
172771
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1656

172771
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1657
47861
      ((state_override != kUnknownState) &&
1658
47861
       (url.flags & URL_FLAGS_TERMINATED)))
1659
44
    return;
1660
1661
  // Define the return value placeholders
1662
172727
  const Local<Value> undef = Undefined(isolate);
1663
172727
  const Local<Value> null = Null(isolate);
1664
172727
  if (!(url.flags & URL_FLAGS_FAILED)) {
1665
    Local<Value> argv[] = {
1666
      undef,
1667
      undef,
1668
      undef,
1669
      undef,
1670
      null,  // host defaults to null
1671
      null,  // port defaults to null
1672
      undef,
1673
      null,  // query defaults to null
1674
      null,  // fragment defaults to null
1675
164248
    };
1676
164248
    SetArgs(env, argv, url);
1677
328496
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1678
8479
  } else if (error_cb->IsFunction()) {
1679
8349
    Local<Value> argv[2] = { undef, undef };
1680
8349
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1681
8349
    argv[ERR_ARG_INPUT] =
1682
16698
      String::NewFromUtf8(env->isolate(), input).ToLocalChecked();
1683
8349
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1684
8349
        .FromMaybe(Local<Value>());
1685
  }
1686
}
1687
1688
172771
void Parse(const FunctionCallbackInfo<Value>& args) {
1689
172771
  Environment* env = Environment::GetCurrent(args);
1690
172771
  CHECK_GE(args.Length(), 5);
1691
345542
  CHECK(args[0]->IsString());  // input
1692


469560
  CHECK(args[2]->IsUndefined() ||  // base context
1693
        args[2]->IsNull() ||
1694
        args[2]->IsObject());
1695


489125
  CHECK(args[3]->IsUndefined() ||  // context
1696
        args[3]->IsNull() ||
1697
        args[3]->IsObject());
1698
172771
  CHECK(args[4]->IsFunction());  // complete callback
1699

470452
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1700
1701
172771
  Utf8Value input(env->isolate(), args[0]);
1702
172771
  enum url_parse_state state_override = kUnknownState;
1703
172771
  if (args[1]->IsNumber()) {
1704
172771
    state_override = static_cast<enum url_parse_state>(
1705
345542
        args[1]->Uint32Value(env->context()).FromJust());
1706
  }
1707
1708
345542
  Parse(env, args.This(),
1709
172771
        *input, input.length(),
1710
        state_override,
1711
        args[2],
1712
        args[3],
1713
345542
        args[4].As<Function>(),
1714
        args[5]);
1715
172771
}
1716
1717
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1718
92
  Environment* env = Environment::GetCurrent(args);
1719
92
  CHECK_GE(args.Length(), 1);
1720
184
  CHECK(args[0]->IsString());
1721
184
  Utf8Value value(env->isolate(), args[0]);
1722
92
  std::string output;
1723
92
  size_t len = value.length();
1724
92
  output.reserve(len);
1725
756
  for (size_t n = 0; n < len; n++) {
1726
664
    const char ch = (*value)[n];
1727
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1728
  }
1729
276
  args.GetReturnValue().Set(
1730
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1731
92
}
1732
1733
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1734
229
  Environment* env = Environment::GetCurrent(args);
1735
229
  CHECK_GE(args.Length(), 1);
1736
458
  CHECK(args[0]->IsString());
1737
229
  Utf8Value value(env->isolate(), args[0]);
1738
1739
229
  URLHost host;
1740
  // Assuming the host is used for a special scheme.
1741
229
  host.ParseHost(*value, value.length(), true);
1742
229
  if (host.ParsingFailed()) {
1743
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1744
12
    return;
1745
  }
1746
217
  std::string out = host.ToStringMove();
1747
651
  args.GetReturnValue().Set(
1748
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1749
}
1750
1751
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1752
207
  Environment* env = Environment::GetCurrent(args);
1753
207
  CHECK_GE(args.Length(), 1);
1754
414
  CHECK(args[0]->IsString());
1755
207
  Utf8Value value(env->isolate(), args[0]);
1756
1757
207
  URLHost host;
1758
  // Assuming the host is used for a special scheme.
1759
207
  host.ParseHost(*value, value.length(), true, true);
1760
207
  if (host.ParsingFailed()) {
1761
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1762
12
    return;
1763
  }
1764
195
  std::string out = host.ToStringMove();
1765
585
  args.GetReturnValue().Set(
1766
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1767
}
1768
1769
625
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1770
625
  Environment* env = Environment::GetCurrent(args);
1771
625
  CHECK_EQ(args.Length(), 1);
1772
625
  CHECK(args[0]->IsFunction());
1773
1250
  env->set_url_constructor_function(args[0].As<Function>());
1774
625
}
1775
1776
625
void Initialize(Local<Object> target,
1777
                Local<Value> unused,
1778
                Local<Context> context,
1779
                void* priv) {
1780
625
  Environment* env = Environment::GetCurrent(context);
1781
625
  env->SetMethod(target, "parse", Parse);
1782
625
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1783
625
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1784
625
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1785
625
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1786
1787
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1788
16875
  FLAGS(XX)
1789
#undef XX
1790
1791
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1792
26250
  PARSESTATES(XX)
1793
#undef XX
1794
625
}
1795
}  // namespace
1796
1797
4946
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1798
4946
  registry->Register(Parse);
1799
4946
  registry->Register(EncodeAuthSet);
1800
4946
  registry->Register(DomainToASCII);
1801
4946
  registry->Register(DomainToUnicode);
1802
4946
  registry->Register(SetURLConstructor);
1803
4946
}
1804
1805
8
std::string URL::ToFilePath() const {
1806
8
  if (context_.scheme != "file:") {
1807
1
    return "";
1808
  }
1809
1810
#ifdef _WIN32
1811
  const char* slash = "\\";
1812
  auto is_slash = [] (char ch) {
1813
    return ch == '/' || ch == '\\';
1814
  };
1815
#else
1816
7
  const char* slash = "/";
1817
46
  auto is_slash = [] (char ch) {
1818
46
    return ch == '/';
1819
  };
1820

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1821
7
      context_.host.length() > 0) {
1822
1
    return "";
1823
  }
1824
#endif
1825
12
  std::string decoded_path;
1826
18
  for (const std::string& part : context_.path) {
1827
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1828
58
    for (char& ch : decoded) {
1829
46
      if (is_slash(ch)) {
1830
1
        return "";
1831
      }
1832
    }
1833
12
    decoded_path += slash + decoded;
1834
  }
1835
1836
#ifdef _WIN32
1837
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1838
1839
  // If hostname is set, then we have a UNC path. Pass the hostname through
1840
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1841
  // need to worry about percent encoding because the URL parser will have
1842
  // already taken care of that for us. Note that this only causes IDNs with an
1843
  // appropriate `xn--` prefix to be decoded.
1844
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1845
      context_.host.length() > 0) {
1846
    std::string unicode_host;
1847
    if (!ToUnicode(context_.host, &unicode_host)) {
1848
      return "";
1849
    }
1850
    return "\\\\" + unicode_host + decoded_path;
1851
  }
1852
  // Otherwise, it's a local path that requires a drive letter.
1853
  if (decoded_path.length() < 3) {
1854
    return "";
1855
  }
1856
  if (decoded_path[2] != ':' ||
1857
      !IsASCIIAlpha(decoded_path[1])) {
1858
    return "";
1859
  }
1860
  // Strip out the leading '\'.
1861
  return decoded_path.substr(1);
1862
#else
1863
5
  return decoded_path;
1864
#endif
1865
}
1866
1867
40354
URL URL::FromFilePath(const std::string& file_path) {
1868
80708
  URL url("file://");
1869
80708
  std::string escaped_file_path;
1870
4379500
  for (size_t i = 0; i < file_path.length(); ++i) {
1871
4339146
    escaped_file_path += file_path[i];
1872
4339146
    if (file_path[i] == '%')
1873
11
      escaped_file_path += "25";
1874
  }
1875
40354
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1876
             &url.context_, true, nullptr, false);
1877
40354
  return url;
1878
}
1879
1880
// This function works by calling out to a JS function that creates and
1881
// returns the JS URL object. Be mindful of the JS<->Native boundary
1882
// crossing that is required.
1883
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1884
  Isolate* isolate = env->isolate();
1885
  Local<Context> context = env->context();
1886
  Context::Scope context_scope(context);
1887
1888
  const Local<Value> undef = Undefined(isolate);
1889
  const Local<Value> null = Null(isolate);
1890
1891
  if (context_.flags & URL_FLAGS_FAILED)
1892
    return Local<Value>();
1893
1894
  Local<Value> argv[] = {
1895
    undef,
1896
    undef,
1897
    undef,
1898
    undef,
1899
    null,  // host defaults to null
1900
    null,  // port defaults to null
1901
    undef,
1902
    null,  // query defaults to null
1903
    null,  // fragment defaults to null
1904
  };
1905
  SetArgs(env, argv, context_);
1906
1907
  MaybeLocal<Value> ret;
1908
  {
1909
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1910
1911
    // The SetURLConstructor method must have been called already to
1912
    // set the constructor function used below. SetURLConstructor is
1913
    // called automatically when the internal/url.js module is loaded
1914
    // during the internal/bootstrap/node.js processing.
1915
    ret = env->url_constructor_function()
1916
        ->Call(env->context(), undef, arraysize(argv), argv);
1917
  }
1918
1919
  return ret;
1920
}
1921
1922
}  // namespace url
1923
}  // namespace node
1924
1925
5007
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1926
4946
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)