GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1148 1207 95.1 %
Date: 2021-09-24 04:12:43 Branches: 988 1120 88.2 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <string>
11
#include <vector>
12
13
namespace node {
14
15
using errors::TryCatchScope;
16
17
using url::table_data::hex;
18
using url::table_data::C0_CONTROL_ENCODE_SET;
19
using url::table_data::FRAGMENT_ENCODE_SET;
20
using url::table_data::PATH_ENCODE_SET;
21
using url::table_data::USERINFO_ENCODE_SET;
22
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
23
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
24
25
using v8::Array;
26
using v8::Context;
27
using v8::Function;
28
using v8::FunctionCallbackInfo;
29
using v8::HandleScope;
30
using v8::Int32;
31
using v8::Integer;
32
using v8::Isolate;
33
using v8::Local;
34
using v8::MaybeLocal;
35
using v8::NewStringType;
36
using v8::Null;
37
using v8::Object;
38
using v8::String;
39
using v8::Undefined;
40
using v8::Value;
41
42
128773
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
43
128773
  return String::NewFromUtf8(isolate,
44
                             str.data(),
45
                             NewStringType::kNormal,
46
128773
                             str.length()).ToLocalChecked();
47
}
48
49
namespace url {
50
namespace {
51
52
// https://url.spec.whatwg.org/#eof-code-point
53
constexpr char kEOL = -1;
54
55
// Used in ToUSVString().
56
constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD;
57
58
// https://url.spec.whatwg.org/#concept-host
59
class URLHost {
60
 public:
61
  ~URLHost();
62
63
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
64
  void ParseIPv6Host(const char* input, size_t length);
65
  void ParseOpaqueHost(const char* input, size_t length);
66
  void ParseHost(const char* input,
67
                 size_t length,
68
                 bool is_special,
69
                 bool unicode = false);
70
71
4572
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
72
  std::string ToString() const;
73
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
74
  std::string ToStringMove();
75
76
 private:
77
  enum class HostType {
78
    H_FAILED,
79
    H_DOMAIN,
80
    H_IPV4,
81
    H_IPV6,
82
    H_OPAQUE,
83
  };
84
85
  union Value {
86
    std::string domain_or_opaque;
87
    uint32_t ipv4;
88
    uint16_t ipv6[8];
89
90
4572
    ~Value() {}
91
4572
    Value() : ipv4(0) {}
92
  };
93
94
  Value value_;
95
  HostType type_ = HostType::H_FAILED;
96
97
12565
  void Reset() {
98
    using string = std::string;
99
12565
    switch (type_) {
100
3934
      case HostType::H_DOMAIN:
101
      case HostType::H_OPAQUE:
102
3934
        value_.domain_or_opaque.~string();
103
3934
        break;
104
8631
      default:
105
8631
        break;
106
    }
107
12565
    type_ = HostType::H_FAILED;
108
12565
  }
109
110
  // Setting the string members of the union with = is brittle because
111
  // it relies on them being initialized to a state that requires no
112
  // destruction of old data.
113
  // For a long time, that worked well enough because ParseIPv6Host() happens
114
  // to zero-fill `value_`, but that really is relying on standard library
115
  // internals too much.
116
  // These helpers are the easiest solution but we might want to consider
117
  // just not forcing strings into an union.
118
458
  void SetOpaque(std::string&& string) {
119
458
    Reset();
120
458
    type_ = HostType::H_OPAQUE;
121
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
122
458
  }
123
124
3476
  void SetDomain(std::string&& string) {
125
3476
    Reset();
126
3476
    type_ = HostType::H_DOMAIN;
127
3476
    new(&value_.domain_or_opaque) std::string(std::move(string));
128
3476
  }
129
};
130
131
4572
URLHost::~URLHost() {
132
4572
  Reset();
133
4572
}
134
135
#define ARGS(XX)                                                              \
136
  XX(ARG_FLAGS)                                                               \
137
  XX(ARG_PROTOCOL)                                                            \
138
  XX(ARG_USERNAME)                                                            \
139
  XX(ARG_PASSWORD)                                                            \
140
  XX(ARG_HOST)                                                                \
141
  XX(ARG_PORT)                                                                \
142
  XX(ARG_PATH)                                                                \
143
  XX(ARG_QUERY)                                                               \
144
  XX(ARG_FRAGMENT)                                                            \
145
  XX(ARG_COUNT)  // This one has to be last.
146
147
#define ERR_ARGS(XX)                                                          \
148
  XX(ERR_ARG_FLAGS)                                                           \
149
  XX(ERR_ARG_INPUT)                                                           \
150
151
enum url_cb_args {
152
#define XX(name) name,
153
  ARGS(XX)
154
#undef XX
155
};
156
157
enum url_error_cb_args {
158
#define XX(name) name,
159
  ERR_ARGS(XX)
160
#undef XX
161
};
162
163
#define CHAR_TEST(bits, name, expr)                                           \
164
  template <typename T>                                                       \
165
  bool name(const T ch) {                                              \
166
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
167
                  "Character must be wider than " #bits " bits");             \
168
    return (expr);                                                            \
169
  }
170
171
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
172
  template <typename T>                                                       \
173
  bool name(const T ch1, const T ch2) {                                \
174
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
175
                  "Character must be wider than " #bits " bits");             \
176
    return (expr);                                                            \
177
  }                                                                           \
178
  template <typename T>                                                       \
179
  bool name(const std::basic_string<T>& str) {                         \
180
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
181
                  "Character must be wider than " #bits " bits");             \
182
    return str.length() >= 2 && name(str[0], str[1]);                         \
183
  }
184
185
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
186

12867996
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
187
188
// https://infra.spec.whatwg.org/#c0-control-or-space
189

265308
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
190
191
// https://infra.spec.whatwg.org/#ascii-digit
192

510280
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
193
194
// https://infra.spec.whatwg.org/#ascii-hex-digit
195


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
196
                               (ch >= 'A' && ch <= 'F') ||
197
                               (ch >= 'a' && ch <= 'f')))
198
199
// https://infra.spec.whatwg.org/#ascii-alpha
200


1136470
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
201
                            (ch >= 'a' && ch <= 'z')))
202
203
// https://infra.spec.whatwg.org/#ascii-alphanumeric
204

499546
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
205
206
// https://infra.spec.whatwg.org/#ascii-lowercase
207
template <typename T>
208
499618
T ASCIILowercase(T ch) {
209
499618
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
210
}
211
212
// https://url.spec.whatwg.org/#forbidden-host-code-point
213









86446
CHAR_TEST(8, IsForbiddenHostCodePoint,
214
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
215
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
216
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
217
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
218
          ch == '^' || ch == '|')
219
220
// https://url.spec.whatwg.org/#windows-drive-letter
221

8574
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
222
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
223
224
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
225

2082
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
226
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
227
228
// If a UTF-16 character is a low/trailing surrogate.
229
3
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
230
231
// If a UTF-16 character is a surrogate.
232
32
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
233
234
// If a UTF-16 surrogate is a low/trailing one.
235
19
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
236
237
#undef CHAR_TEST
238
#undef TWO_CHAR_STRING_TEST
239
240
241
10834906
bool BitAt(const uint8_t a[], const uint8_t i) {
242
10834906
  return !!(a[i >> 3] & (1 << (i & 7)));
243
}
244
245
// Appends ch to str. If ch position in encode_set is set, the ch will
246
// be percent-encoded then appended.
247
10834906
void AppendOrEscape(std::string* str,
248
                    const unsigned char ch,
249
                    const uint8_t encode_set[]) {
250
10834906
  if (BitAt(encode_set, ch))
251
1920
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
252
  else
253
10832986
    *str += ch;
254
10834906
}
255
256
template <typename T>
257
850
unsigned hex2bin(const T ch) {
258

850
  if (ch >= '0' && ch <= '9')
259
546
    return ch - '0';
260

304
  if (ch >= 'A' && ch <= 'F')
261
172
    return 10 + (ch - 'A');
262

132
  if (ch >= 'a' && ch <= 'f')
263
132
    return 10 + (ch - 'a');
264
  return static_cast<unsigned>(-1);
265
}
266
267
3920
std::string PercentDecode(const char* input, size_t len) {
268
3920
  std::string dest;
269
3920
  if (len == 0)
270
2
    return dest;
271
3918
  dest.reserve(len);
272
3918
  const char* pointer = input;
273
3918
  const char* end = input + len;
274
275
88914
  while (pointer < end) {
276
84996
    const char ch = pointer[0];
277
84996
    size_t remaining = end - pointer - 1;
278


85433
    if (ch != '%' || remaining < 2 ||
279
437
        (ch == '%' &&
280
437
         (!IsASCIIHexDigit(pointer[1]) ||
281
433
          !IsASCIIHexDigit(pointer[2])))) {
282
84571
      dest += ch;
283
84571
      pointer++;
284
84571
      continue;
285
    } else {
286
425
      unsigned a = hex2bin(pointer[1]);
287
425
      unsigned b = hex2bin(pointer[2]);
288
425
      char c = static_cast<char>(a * 16 + b);
289
425
      dest += c;
290
425
      pointer += 3;
291
    }
292
  }
293
3918
  return dest;
294
}
295
296
#define SPECIALS(XX)                                                          \
297
  XX(ftp, 21, "ftp:")                                                         \
298
  XX(file, -1, "file:")                                                       \
299
  XX(http, 80, "http:")                                                       \
300
  XX(https, 443, "https:")                                                    \
301
  XX(ws, 80, "ws:")                                                           \
302
  XX(wss, 443, "wss:")
303
304
309597
bool IsSpecial(const std::string& scheme) {
305
#define V(_, __, name) if (scheme == name) return true;
306



309597
  SPECIALS(V);
307
#undef V
308
5454
  return false;
309
}
310
311
125618
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
312
#define V(key, _, name) if (scheme == name)                                  \
313
    return env->url_special_##key##_string();
314



125618
  SPECIALS(V)
315
#undef V
316
  UNREACHABLE();
317
}
318
319
121345
int NormalizePort(const std::string& scheme, int p) {
320
#define V(_, port, name) if (scheme == name && p == port) return -1;
321









121345
  SPECIALS(V);
322
#undef V
323
8908
  return p;
324
}
325
326
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
327
4415
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
328
4415
  size_t length = end - p;
329
4043
  return length >= 2 &&
330

8494
    IsWindowsDriveLetter(p[0], p[1]) &&
331
36
    (length == 2 ||
332
36
      p[2] == '/' ||
333
14
      p[2] == '\\' ||
334
6
      p[2] == '?' ||
335
4419
      p[2] == '#');
336
}
337
338
#if defined(NODE_HAVE_I18N_SUPPORT)
339
195
bool ToUnicode(const std::string& input, std::string* output) {
340
390
  MaybeStackBuffer<char> buf;
341
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
342
    return false;
343
195
  output->assign(*buf, buf.length());
344
195
  return true;
345
}
346
347
3907
bool ToASCII(const std::string& input, std::string* output) {
348
7814
  MaybeStackBuffer<char> buf;
349
3907
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
350
124
    return false;
351
3783
  if (buf.length() == 0)
352
24
    return false;
353
3759
  output->assign(*buf, buf.length());
354
3759
  return true;
355
}
356
#else
357
// Intentional non-ops if ICU is not present.
358
bool ToUnicode(const std::string& input, std::string* output) {
359
  *output = input;
360
  return true;
361
}
362
363
bool ToASCII(const std::string& input, std::string* output) {
364
  *output = input;
365
  return true;
366
}
367
#endif
368
369
#define NS_IN6ADDRSZ 16
370
371
137
void URLHost::ParseIPv6Host(const char* input, size_t length) {
372
137
  CHECK_EQ(type_, HostType::H_FAILED);
373
374
  unsigned char buf[sizeof(struct in6_addr)];
375
137
  MaybeStackBuffer<char> ipv6(length + 1);
376
137
  *(*ipv6 + length) = 0;
377
137
  memset(buf, 0, sizeof(buf));
378
137
  memcpy(*ipv6, input, sizeof(const char) * length);
379
380
137
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
381
382
137
  if (ret != 0) {
383
92
    return;
384
  }
385
386
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
387
405
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
388
360
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
389
  }
390
391
45
  type_ = HostType::H_IPV6;
392
}
393
394
3782
int64_t ParseNumber(const char* start, const char* end) {
395
3782
  unsigned R = 10;
396

3782
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
397
48
    start += 2;
398
48
    R = 16;
399
  }
400
3782
  if (end - start == 0) {
401
8
    return 0;
402

3774
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
403
55
    start++;
404
55
    R = 8;
405
  }
406
3774
  const char* p = start;
407
408
4937
  while (p < end) {
409
4623
    const char ch = p[0];
410

4623
    switch (R) {
411
274
      case 8:
412

274
        if (ch < '0' || ch > '7')
413
29
          return -1;
414
245
        break;
415
4141
      case 10:
416
4141
        if (!IsASCIIDigit(ch))
417
3427
          return -1;
418
714
        break;
419
208
      case 16:
420
208
        if (!IsASCIIHexDigit(ch))
421
4
          return -1;
422
204
        break;
423
    }
424
1163
    p++;
425
  }
426
314
  return strtoll(start, nullptr, R);
427
}
428
429
3584
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
430
3584
  CHECK_EQ(type_, HostType::H_FAILED);
431
3584
  *is_ipv4 = false;
432
3584
  const char* pointer = input;
433
3584
  const char* mark = input;
434
3584
  const char* end = pointer + length;
435
3584
  int parts = 0;
436
3584
  uint32_t val = 0;
437
  uint64_t numbers[4];
438
3584
  int tooBigNumbers = 0;
439
3584
  if (length == 0)
440
3504
    return;
441
442
32849
  while (pointer <= end) {
443
32745
    const char ch = pointer < end ? pointer[0] : kEOL;
444
32745
    int64_t remaining = end - pointer - 1;
445

32745
    if (ch == '.' || ch == kEOL) {
446
3798
      if (++parts > static_cast<int>(arraysize(numbers)))
447
4
        return;
448
3794
      if (pointer == mark)
449
12
        return;
450
3782
      int64_t n = ParseNumber(mark, pointer);
451
3782
      if (n < 0)
452
3460
        return;
453
454
322
      if (n > 255) {
455
112
        tooBigNumbers++;
456
      }
457
322
      numbers[parts - 1] = n;
458
322
      mark = pointer + 1;
459

322
      if (ch == '.' && remaining == 0)
460
4
        break;
461
    }
462
29265
    pointer++;
463
  }
464
108
  CHECK_GT(parts, 0);
465
108
  *is_ipv4 = true;
466
467
  // If any but the last item in numbers is greater than 255, return failure.
468
  // If the last item in numbers is greater than or equal to
469
  // 256^(5 - the number of items in numbers), return failure.
470
104
  if (tooBigNumbers > 1 ||
471

272
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
472
100
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
473
28
    return;
474
  }
475
476
80
  type_ = HostType::H_IPV4;
477
80
  val = static_cast<uint32_t>(numbers[parts - 1]);
478
196
  for (int n = 0; n < parts - 1; n++) {
479
116
    double b = 3 - n;
480
116
    val +=
481
116
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
482
  }
483
484
80
  value_.ipv4 = val;
485
}
486
487
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
488
520
  CHECK_EQ(type_, HostType::H_FAILED);
489
520
  std::string output;
490
520
  output.reserve(length);
491
3053
  for (size_t i = 0; i < length; i++) {
492
2595
    const char ch = input[i];
493

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
494
62
      return;
495
    } else {
496
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
497
    }
498
  }
499
500
458
  SetOpaque(std::move(output));
501
}
502
503
4572
void URLHost::ParseHost(const char* input,
504
                        size_t length,
505
                        bool is_special,
506
                        bool unicode) {
507
4572
  CHECK_EQ(type_, HostType::H_FAILED);
508
4572
  const char* pointer = input;
509
510
4572
  if (length == 0)
511
1096
    return;
512
513
4572
  if (pointer[0] == '[') {
514
145
    if (pointer[length - 1] != ']')
515
8
      return;
516
137
    return ParseIPv6Host(++pointer, length - 2);
517
  }
518
519
4427
  if (!is_special)
520
520
    return ParseOpaqueHost(input, length);
521
522
  // First, we have to percent decode
523
3907
  std::string decoded = PercentDecode(input, length);
524
525
  // Then we have to punycode toASCII
526
3907
  if (!ToASCII(decoded, &decoded))
527
148
    return;
528
529
  // If any of the following characters are still present, we have to fail
530
87461
  for (size_t n = 0; n < decoded.size(); n++) {
531
83877
    const char ch = decoded[n];
532
83877
    if (IsForbiddenHostCodePoint(ch)) {
533
175
      return;
534
    }
535
  }
536
537
  // Check to see if it's an IPv4 IP address
538
  bool is_ipv4;
539
3584
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
540
3584
  if (is_ipv4)
541
108
    return;
542
543
  // If the unicode flag is set, run the result through punycode ToUnicode
544

3476
  if (unicode && !ToUnicode(decoded, &decoded))
545
    return;
546
547
  // It's not an IPv4 or IPv6 address, it must be a domain
548
3476
  SetDomain(std::move(decoded));
549
}
550
551
// Locates the longest sequence of 0 segments in an IPv6 address
552
// in order to use the :: compression when serializing
553
template <typename T>
554
45
T* FindLongestZeroSequence(T* values, size_t len) {
555
45
  T* start = values;
556
45
  T* end = start + len;
557
45
  T* result = nullptr;
558
559
45
  T* current = nullptr;
560
45
  unsigned counter = 0, longest = 1;
561
562
405
  while (start < end) {
563
360
    if (*start == 0) {
564
265
      if (current == nullptr)
565
59
        current = start;
566
265
      counter++;
567
    } else {
568
95
      if (counter > longest) {
569
37
        longest = counter;
570
37
        result = current;
571
      }
572
95
      counter = 0;
573
95
      current = nullptr;
574
    }
575
360
    start++;
576
  }
577
45
  if (counter > longest)
578
6
    result = current;
579
45
  return result;
580
}
581
582
4059
std::string URLHost::ToStringMove() {
583
4059
  std::string return_value;
584
4059
  switch (type_) {
585
3934
    case HostType::H_DOMAIN:
586
    case HostType::H_OPAQUE:
587
3934
      return_value = std::move(value_.domain_or_opaque);
588
3934
      break;
589
125
    default:
590
125
      return_value = ToString();
591
125
      break;
592
  }
593
4059
  Reset();
594
4059
  return return_value;
595
}
596
597
125
std::string URLHost::ToString() const {
598
250
  std::string dest;
599

125
  switch (type_) {
600
    case HostType::H_DOMAIN:
601
    case HostType::H_OPAQUE:
602
      return value_.domain_or_opaque;
603
80
    case HostType::H_IPV4: {
604
80
      dest.reserve(15);
605
80
      uint32_t value = value_.ipv4;
606
400
      for (int n = 0; n < 4; n++) {
607
        char buf[4];
608
320
        snprintf(buf, sizeof(buf), "%d", value % 256);
609
320
        dest.insert(0, buf);
610
320
        if (n < 3)
611
240
          dest.insert(0, 1, '.');
612
320
        value /= 256;
613
      }
614
80
      break;
615
    }
616
45
    case HostType::H_IPV6: {
617
45
      dest.reserve(41);
618
45
      dest += '[';
619
45
      const uint16_t* start = &value_.ipv6[0];
620
      const uint16_t* compress_pointer =
621
45
          FindLongestZeroSequence(start, 8);
622
45
      bool ignore0 = false;
623
405
      for (int n = 0; n <= 7; n++) {
624
360
        const uint16_t* piece = &value_.ipv6[n];
625

360
        if (ignore0 && *piece == 0)
626
245
          continue;
627
156
        else if (ignore0)
628
35
          ignore0 = false;
629
156
        if (compress_pointer == piece) {
630
41
          dest += n == 0 ? "::" : ":";
631
41
          ignore0 = true;
632
41
          continue;
633
        }
634
        char buf[5];
635
115
        snprintf(buf, sizeof(buf), "%x", *piece);
636
115
        dest += buf;
637
115
        if (n < 7)
638
76
          dest += ':';
639
      }
640
45
      dest += ']';
641
45
      break;
642
    }
643
    case HostType::H_FAILED:
644
      break;
645
  }
646
125
  return dest;
647
}
648
649
4230
bool ParseHost(const std::string& input,
650
               std::string* output,
651
               bool is_special,
652
               bool unicode = false) {
653
4230
  if (input.empty()) {
654
94
    output->clear();
655
94
    return true;
656
  }
657
8272
  URLHost host;
658
4136
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
659
4136
  if (host.ParsingFailed())
660
489
    return false;
661
3647
  *output = host.ToStringMove();
662
3647
  return true;
663
}
664
665
6449
std::vector<std::string> FromJSStringArray(Environment* env,
666
                                           Local<Array> array) {
667
6449
  std::vector<std::string> vec;
668
6449
  if (array->Length() > 0)
669
6433
    vec.reserve(array->Length());
670
89474
  for (size_t n = 0; n < array->Length(); n++) {
671
76576
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
672
76576
    if (val->IsString()) {
673
38288
      Utf8Value value(env->isolate(), val.As<String>());
674
38288
      vec.emplace_back(*value, value.length());
675
    }
676
  }
677
6449
  return vec;
678
}
679
680
6449
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
681
6449
  url_data base;
682
6449
  Local<Context> context = env->context();
683
684
  Local<Value> flags =
685
19347
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
686
6449
  if (flags->IsInt32())
687
12898
    base.flags = flags->Int32Value(context).FromJust();
688
689
  Local<Value> port =
690
19347
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
691
6449
  if (port->IsInt32())
692
16
    base.port = port->Int32Value(context).FromJust();
693
694
  Local<Value> scheme =
695
12898
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
696
6449
  base.scheme = Utf8Value(env->isolate(), scheme).out();
697
698
  auto GetStr = [&](std::string url_data::*member,
699
                    int flag,
700
                    Local<String> name,
701
32245
                    bool empty_as_present) {
702
64490
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
703
64490
    if (value->IsString()) {
704
36386
      Utf8Value utf8value(env->isolate(), value.As<String>());
705
18193
      (base.*member).assign(*utf8value, utf8value.length());
706

31091
      if (empty_as_present || value.As<String>()->Length() != 0) {
707
5317
        base.flags |= flag;
708
      }
709
    }
710
38694
  };
711
6449
  GetStr(&url_data::username,
712
         URL_FLAGS_HAS_USERNAME,
713
         env->username_string(),
714
         false);
715
6449
  GetStr(&url_data::password,
716
         URL_FLAGS_HAS_PASSWORD,
717
         env->password_string(),
718
         false);
719
6449
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
720
6449
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
721
6449
  GetStr(&url_data::fragment,
722
         URL_FLAGS_HAS_FRAGMENT,
723
         env->fragment_string(),
724
         true);
725
726
  Local<Value>
727
19347
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
728
6449
  if (path->IsArray()) {
729
6449
    base.flags |= URL_FLAGS_HAS_PATH;
730
6449
    base.path = FromJSStringArray(env, path.As<Array>());
731
  }
732
6449
  return base;
733
}
734
735
39987
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
736
39987
  url_data context;
737
  Local<Value> flags =
738
119961
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
739
39987
  if (flags->IsInt32()) {
740
    static constexpr int32_t kCopyFlagsMask =
741
        URL_FLAGS_SPECIAL |
742
        URL_FLAGS_CANNOT_BE_BASE |
743
        URL_FLAGS_HAS_USERNAME |
744
        URL_FLAGS_HAS_PASSWORD |
745
        URL_FLAGS_HAS_HOST;
746
39987
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
747
  }
748
  Local<Value> scheme =
749
119961
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
750
79974
  if (scheme->IsString()) {
751
79974
    Utf8Value value(env->isolate(), scheme);
752
39987
    context.scheme.assign(*value, value.length());
753
  }
754
  Local<Value> port =
755
119961
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
756
39987
  if (port->IsInt32())
757
237
    context.port = port.As<Int32>()->Value();
758
39987
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
759
    Local<Value> username =
760
221
        context_obj->Get(env->context(),
761
663
                         env->username_string()).ToLocalChecked();
762
442
    CHECK(username->IsString());
763
442
    Utf8Value value(env->isolate(), username);
764
221
    context.username.assign(*value, value.length());
765
  }
766
39987
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
767
    Local<Value> password =
768
209
        context_obj->Get(env->context(),
769
627
                         env->password_string()).ToLocalChecked();
770
418
    CHECK(password->IsString());
771
418
    Utf8Value value(env->isolate(), password);
772
209
    context.password.assign(*value, value.length());
773
  }
774
  Local<Value> host =
775
39987
      context_obj->Get(env->context(),
776
119961
                       env->host_string()).ToLocalChecked();
777
79974
  if (host->IsString()) {
778
79898
    Utf8Value value(env->isolate(), host);
779
39949
    context.host.assign(*value, value.length());
780
  }
781
39987
  return context;
782
}
783
784
// Single dot segment can be ".", "%2e", or "%2E"
785
2298170
bool IsSingleDotSegment(const std::string& str) {
786
2298170
  switch (str.size()) {
787
4848
    case 1:
788
4848
      return str == ".";
789
133414
    case 3:
790
133414
      return str[0] == '%' &&
791

133460
             str[1] == '2' &&
792
133460
             ASCIILowercase(str[2]) == 'e';
793
2159908
    default:
794
2159908
      return false;
795
  }
796
}
797
798
// Double dot segment can be:
799
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
800
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
801
1152629
bool IsDoubleDotSegment(const std::string& str) {
802

1152629
  switch (str.size()) {
803
3863
    case 2:
804
3863
      return str == "..";
805
322819
    case 4:
806

322819
      if (str[0] != '.' && str[0] != '%')
807
322800
        return false;
808
19
      return ((str[0] == '.' &&
809
13
               str[1] == '%' &&
810

8
               str[2] == '2' &&
811
42
               ASCIILowercase(str[3]) == 'e') ||
812
15
              (str[0] == '%' &&
813

12
               str[1] == '2' &&
814
6
               ASCIILowercase(str[2]) == 'e' &&
815
25
               str[3] == '.'));
816
66182
    case 6:
817
66182
      return (str[0] == '%' &&
818

24
              str[1] == '2' &&
819
12
              ASCIILowercase(str[2]) == 'e' &&
820
4
              str[3] == '%' &&
821

66198
              str[4] == '2' &&
822
66186
              ASCIILowercase(str[5]) == 'e');
823
759765
    default:
824
759765
      return false;
825
  }
826
}
827
828
7781
void ShortenUrlPath(struct url_data* url) {
829
7781
  if (url->path.empty()) return;
830


7901
  if (url->path.size() == 1 && url->scheme == "file:" &&
831
442
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
832
7459
  url->path.pop_back();
833
}
834
835
}  // anonymous namespace
836
837
206405
void URL::Parse(const char* input,
838
                size_t len,
839
                enum url_parse_state state_override,
840
                struct url_data* url,
841
                bool has_url,
842
                const struct url_data* base,
843
                bool has_base) {
844
206405
  const char* p = input;
845
206405
  const char* end = input + len;
846
847
206405
  if (!has_url) {
848
132677
    for (const char* ptr = p; ptr < end; ptr++) {
849
132658
      if (IsC0ControlOrSpace(*ptr))
850
56
        p++;
851
      else
852
132602
        break;
853
    }
854
132669
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
855
132650
      if (IsC0ControlOrSpace(*ptr))
856
48
        end--;
857
      else
858
132602
        break;
859
    }
860
132621
    input = p;
861
132621
    len = end - p;
862
  }
863
864
  // The spec says we should strip out any ASCII tabs or newlines.
865
  // In those cases, we create another std::string instance with the filtered
866
  // contents, but in the general case we avoid the overhead.
867
206405
  std::string whitespace_stripped;
868
13073378
  for (const char* ptr = p; ptr < end; ptr++) {
869
12867143
    if (!IsASCIITabOrNewline(*ptr))
870
12866973
      continue;
871
    // Hit tab or newline. Allocate storage, copy what we have until now,
872
    // and then iterate and filter all similar characters out.
873
170
    whitespace_stripped.reserve(len - 1);
874
170
    whitespace_stripped.assign(p, ptr - p);
875
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
876
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
877
853
      if (!IsASCIITabOrNewline(*ptr))
878
769
        whitespace_stripped += *ptr;
879
    }
880
881
    // Update variables like they should have looked like if the string
882
    // had been stripped of whitespace to begin with.
883
170
    input = whitespace_stripped.c_str();
884
170
    len = whitespace_stripped.size();
885
170
    p = input;
886
170
    end = input + len;
887
170
    break;
888
  }
889
890
206405
  bool atflag = false;  // Set when @ has been seen.
891
206405
  bool square_bracket_flag = false;  // Set inside of [...]
892
206405
  bool password_token_seen_flag = false;  // Set after a : after an username.
893
894
206405
  std::string buffer;
895
896
  // Set the initial parse state.
897
206405
  const bool has_state_override = state_override != kUnknownState;
898
206405
  enum url_parse_state state = has_state_override ? state_override :
899
                                                    kSchemeStart;
900
901

206405
  if (state < kSchemeStart || state > kFragment) {
902
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
903
    return;
904
  }
905
906
13534852
  while (p <= end) {
907
13336906
    const char ch = p < end ? p[0] : kEOL;
908
13336906
    bool special = (url->flags & URL_FLAGS_SPECIAL);
909
    bool cannot_be_base;
910

13336906
    bool special_back_slash = (special && ch == '\\');
911
912





13336906
    switch (state) {
913
132698
      case kSchemeStart:
914
132698
        if (IsASCIIAlpha(ch)) {
915
121658
          buffer += ASCIILowercase(ch);
916
121658
          state = kScheme;
917
11040
        } else if (!has_state_override) {
918
11030
          state = kNoScheme;
919
11030
          continue;
920
        } else {
921
10
          url->flags |= URL_FLAGS_FAILED;
922
10
          return;
923
        }
924
121658
        break;
925
499546
      case kScheme:
926


499546
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
927
377888
          buffer += ASCIILowercase(ch);
928

121658
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
929

120406
          if (has_state_override && buffer.size() == 0) {
930
            url->flags |= URL_FLAGS_TERMINATED;
931
            return;
932
          }
933
120406
          buffer += ':';
934
935
120406
          bool new_is_special = IsSpecial(buffer);
936
937
120406
          if (has_state_override) {
938
39
            if ((special != new_is_special) ||
939
39
                ((buffer == "file:") &&
940
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
941
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
942


104
                  (url->port != -1))) ||
943
39
                  (url->scheme == "file:" && url->host.empty())) {
944
32
              url->flags |= URL_FLAGS_TERMINATED;
945
32
              return;
946
            }
947
          }
948
949
120374
          url->scheme = std::move(buffer);
950
120374
          url->port = NormalizePort(url->scheme, url->port);
951
120374
          if (new_is_special) {
952
115626
            url->flags |= URL_FLAGS_SPECIAL;
953
115626
            special = true;
954
          } else {
955
4748
            url->flags &= ~URL_FLAGS_SPECIAL;
956
4748
            special = false;
957
          }
958

120374
          special_back_slash = (special && ch == '\\');
959
120374
          buffer.clear();
960
120374
          if (has_state_override)
961
27
            return;
962
120347
          if (url->scheme == "file:") {
963
112388
            state = kFile;
964
3227
          } else if (special &&
965

11186
                     has_base &&
966
1027
                     url->scheme == base->scheme) {
967
317
            state = kSpecialRelativeOrAuthority;
968
7642
          } else if (special) {
969
2910
            state = kSpecialAuthoritySlashes;
970

4732
          } else if (p + 1 < end && p[1] == '/') {
971
716
            state = kPathOrAuthority;
972
716
            p++;
973
          } else {
974
4016
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
975
4016
            url->flags |= URL_FLAGS_HAS_PATH;
976
4016
            url->path.emplace_back("");
977
4016
            state = kCannotBeBase;
978
120347
          }
979
1252
        } else if (!has_state_override) {
980
1244
          buffer.clear();
981
1244
          state = kNoScheme;
982
1244
          p = input;
983
1244
          continue;
984
        } else {
985
8
          url->flags |= URL_FLAGS_FAILED;
986
8
          return;
987
        }
988
498235
        break;
989
12274
      case kNoScheme:
990

12274
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
991

12274
        if (!has_base || (cannot_be_base && ch != '#')) {
992
7487
          url->flags |= URL_FLAGS_FAILED;
993
7487
          return;
994

4787
        } else if (cannot_be_base && ch == '#') {
995
28
          url->scheme = base->scheme;
996
28
          if (IsSpecial(url->scheme)) {
997
            url->flags |= URL_FLAGS_SPECIAL;
998
            special = true;
999
          } else {
1000
28
            url->flags &= ~URL_FLAGS_SPECIAL;
1001
28
            special = false;
1002
          }
1003

28
          special_back_slash = (special && ch == '\\');
1004
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
1005
28
            url->flags |= URL_FLAGS_HAS_PATH;
1006
28
            url->path = base->path;
1007
          }
1008
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1009
4
            url->flags |= URL_FLAGS_HAS_QUERY;
1010
4
            url->query = base->query;
1011
          }
1012
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1013
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1014
            url->fragment = base->fragment;
1015
          }
1016
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1017
28
          state = kFragment;
1018

9518
        } else if (has_base &&
1019
4759
                   base->scheme != "file:") {
1020
323
          state = kRelative;
1021
323
          continue;
1022
        } else {
1023
4436
          url->scheme = "file:";
1024
4436
          url->flags |= URL_FLAGS_SPECIAL;
1025
4436
          special = true;
1026
4436
          state = kFile;
1027

4436
          special_back_slash = (special && ch == '\\');
1028
4436
          continue;
1029
        }
1030
28
        break;
1031
317
      case kSpecialRelativeOrAuthority:
1032

317
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1033
285
          state = kSpecialAuthorityIgnoreSlashes;
1034
285
          p++;
1035
        } else {
1036
32
          state = kRelative;
1037
32
          continue;
1038
        }
1039
285
        break;
1040
716
      case kPathOrAuthority:
1041
716
        if (ch == '/') {
1042
548
          state = kAuthority;
1043
        } else {
1044
168
          state = kPath;
1045
168
          continue;
1046
        }
1047
548
        break;
1048
355
      case kRelative:
1049
355
        url->scheme = base->scheme;
1050
355
        if (IsSpecial(url->scheme)) {
1051
255
          url->flags |= URL_FLAGS_SPECIAL;
1052
255
          special = true;
1053
        } else {
1054
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1055
100
          special = false;
1056
        }
1057

355
        special_back_slash = (special && ch == '\\');
1058

355
        switch (ch) {
1059
18
          case kEOL:
1060
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1061
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1062
4
              url->username = base->username;
1063
            }
1064
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1065
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1066
4
              url->password = base->password;
1067
            }
1068
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1069
16
              url->flags |= URL_FLAGS_HAS_HOST;
1070
16
              url->host = base->host;
1071
            }
1072
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1073
              url->flags |= URL_FLAGS_HAS_QUERY;
1074
              url->query = base->query;
1075
            }
1076
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1077
18
              url->flags |= URL_FLAGS_HAS_PATH;
1078
18
              url->path = base->path;
1079
            }
1080
18
            url->port = base->port;
1081
18
            break;
1082
76
          case '/':
1083
76
            state = kRelativeSlash;
1084
76
            break;
1085
38
          case '?':
1086
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1087
              url->flags |= URL_FLAGS_HAS_USERNAME;
1088
              url->username = base->username;
1089
            }
1090
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1091
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1092
              url->password = base->password;
1093
            }
1094
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1095
34
              url->flags |= URL_FLAGS_HAS_HOST;
1096
34
              url->host = base->host;
1097
            }
1098
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1099
38
              url->flags |= URL_FLAGS_HAS_PATH;
1100
38
              url->path = base->path;
1101
            }
1102
38
            url->port = base->port;
1103
38
            state = kQuery;
1104
38
            break;
1105
38
          case '#':
1106
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1107
              url->flags |= URL_FLAGS_HAS_USERNAME;
1108
              url->username = base->username;
1109
            }
1110
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1111
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1112
              url->password = base->password;
1113
            }
1114
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1115
34
              url->flags |= URL_FLAGS_HAS_HOST;
1116
34
              url->host = base->host;
1117
            }
1118
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1119
              url->flags |= URL_FLAGS_HAS_QUERY;
1120
              url->query = base->query;
1121
            }
1122
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1123
38
              url->flags |= URL_FLAGS_HAS_PATH;
1124
38
              url->path = base->path;
1125
            }
1126
38
            url->port = base->port;
1127
38
            state = kFragment;
1128
38
            break;
1129
185
          default:
1130
185
            if (special_back_slash) {
1131
18
              state = kRelativeSlash;
1132
            } else {
1133
167
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1134
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1135
1
                url->username = base->username;
1136
              }
1137
167
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1138
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1139
1
                url->password = base->password;
1140
              }
1141
167
              if (base->flags & URL_FLAGS_HAS_HOST) {
1142
147
                url->flags |= URL_FLAGS_HAS_HOST;
1143
147
                url->host = base->host;
1144
              }
1145
167
              if (base->flags & URL_FLAGS_HAS_PATH) {
1146
167
                url->flags |= URL_FLAGS_HAS_PATH;
1147
167
                url->path = base->path;
1148
167
                ShortenUrlPath(url);
1149
              }
1150
167
              url->port = base->port;
1151
167
              state = kPath;
1152
167
              continue;
1153
            }
1154
        }
1155
188
        break;
1156
94
      case kRelativeSlash:
1157


94
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1158
22
          state = kSpecialAuthorityIgnoreSlashes;
1159
72
        } else if (ch == '/') {
1160
6
          state = kAuthority;
1161
        } else {
1162
66
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1163
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1164
8
            url->username = base->username;
1165
          }
1166
66
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1167
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1168
4
            url->password = base->password;
1169
          }
1170
66
          if (base->flags & URL_FLAGS_HAS_HOST) {
1171
58
            url->flags |= URL_FLAGS_HAS_HOST;
1172
58
            url->host = base->host;
1173
          }
1174
66
          url->port = base->port;
1175
66
          state = kPath;
1176
66
          continue;
1177
        }
1178
28
        break;
1179
2910
      case kSpecialAuthoritySlashes:
1180
2910
        state = kSpecialAuthorityIgnoreSlashes;
1181

2910
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1182
2761
          p++;
1183
        } else {
1184
149
          continue;
1185
        }
1186
2761
        break;
1187
3294
      case kSpecialAuthorityIgnoreSlashes:
1188

3294
        if (ch != '/' && ch != '\\') {
1189
3217
          state = kAuthority;
1190
3217
          continue;
1191
        }
1192
77
        break;
1193
84421
      case kAuthority:
1194
84421
        if (ch == '@') {
1195
563
          if (atflag) {
1196
41
            buffer.reserve(buffer.size() + 3);
1197
41
            buffer.insert(0, "%40");
1198
          }
1199
563
          atflag = true;
1200
563
          size_t blen = buffer.size();
1201

563
          if (blen > 0 && buffer[0] != ':') {
1202
467
            url->flags |= URL_FLAGS_HAS_USERNAME;
1203
          }
1204
6632
          for (size_t n = 0; n < blen; n++) {
1205
6069
            const char bch = buffer[n];
1206
6069
            if (bch == ':') {
1207
442
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1208
442
              if (!password_token_seen_flag) {
1209
426
                password_token_seen_flag = true;
1210
426
                continue;
1211
              }
1212
            }
1213
5643
            if (password_token_seen_flag) {
1214
2714
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1215
            } else {
1216
2929
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1217
            }
1218
          }
1219
563
          buffer.clear();
1220

83858
        } else if (ch == kEOL ||
1221
80157
                   ch == '/' ||
1222
80125
                   ch == '?' ||
1223
80107
                   ch == '#' ||
1224
                   special_back_slash) {
1225

3771
          if (atflag && buffer.size() == 0) {
1226
52
            url->flags |= URL_FLAGS_FAILED;
1227
52
            return;
1228
          }
1229
3719
          p -= buffer.size() + 1;
1230
3719
          buffer.clear();
1231
3719
          state = kHost;
1232
        } else {
1233
80087
          buffer += ch;
1234
        }
1235
84369
        break;
1236
79272
      case kHost:
1237
      case kHostname:
1238

79272
        if (has_state_override && url->scheme == "file:") {
1239
12
          state = kFileHost;
1240
12
          continue;
1241

79260
        } else if (ch == ':' && !square_bracket_flag) {
1242
1017
          if (buffer.size() == 0) {
1243
24
            url->flags |= URL_FLAGS_FAILED;
1244
24
            return;
1245
          }
1246
993
          if (state_override == kHostname) {
1247
4
            return;
1248
          }
1249
989
          url->flags |= URL_FLAGS_HAS_HOST;
1250
989
          if (!ParseHost(buffer, &url->host, special)) {
1251
5
            url->flags |= URL_FLAGS_FAILED;
1252
5
            return;
1253
          }
1254
984
          buffer.clear();
1255
984
          state = kPort;
1256

78243
        } else if (ch == kEOL ||
1257
75257
                   ch == '/' ||
1258
75217
                   ch == '?' ||
1259
75191
                   ch == '#' ||
1260
                   special_back_slash) {
1261
3076
          p--;
1262

3076
          if (special && buffer.size() == 0) {
1263
21
            url->flags |= URL_FLAGS_FAILED;
1264
21
            return;
1265
          }
1266
325
          if (has_state_override &&
1267

3418
              buffer.size() == 0 &&
1268
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1269
38
               url->port != -1)) {
1270
8
            url->flags |= URL_FLAGS_TERMINATED;
1271
8
            return;
1272
          }
1273
3047
          url->flags |= URL_FLAGS_HAS_HOST;
1274
3047
          if (!ParseHost(buffer, &url->host, special)) {
1275
432
            url->flags |= URL_FLAGS_FAILED;
1276
432
            return;
1277
          }
1278
2615
          buffer.clear();
1279
2615
          state = kPathStart;
1280
2615
          if (has_state_override) {
1281
221
            return;
1282
          }
1283
        } else {
1284
75167
          if (ch == '[')
1285
139
            square_bracket_flag = true;
1286
75167
          if (ch == ']')
1287
135
            square_bracket_flag = false;
1288
75167
          buffer += ch;
1289
        }
1290
78545
        break;
1291
5515
      case kPort:
1292
5515
        if (IsASCIIDigit(ch)) {
1293
4468
          buffer += ch;
1294

1047
        } else if (has_state_override ||
1295
544
                   ch == kEOL ||
1296
36
                   ch == '/' ||
1297
36
                   ch == '?' ||
1298
36
                   ch == '#' ||
1299
                   special_back_slash) {
1300
1011
          if (buffer.size() > 0) {
1301
997
            unsigned port = 0;
1302
            // the condition port <= 0xffff prevents integer overflow
1303

5249
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1304
4252
              port = port * 10 + buffer[i] - '0';
1305
997
            if (port > 0xffff) {
1306
              // TODO(TimothyGu): This hack is currently needed for the host
1307
              // setter since it needs access to hostname if it is valid, and
1308
              // if the FAILED flag is set the entire response to JS layer
1309
              // will be empty.
1310
26
              if (state_override == kHost)
1311
2
                url->port = -1;
1312
              else
1313
24
                url->flags |= URL_FLAGS_FAILED;
1314
26
              return;
1315
            }
1316
            // the port is valid
1317
971
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1318
971
            if (url->port == -1)
1319
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1320
971
            buffer.clear();
1321
14
          } else if (has_state_override) {
1322
            // TODO(TimothyGu): Similar case as above.
1323
6
            if (state_override == kHost)
1324
2
              url->port = -1;
1325
            else
1326
4
              url->flags |= URL_FLAGS_TERMINATED;
1327
6
            return;
1328
          }
1329
979
          state = kPathStart;
1330
979
          continue;
1331
        } else {
1332
36
          url->flags |= URL_FLAGS_FAILED;
1333
36
          return;
1334
        }
1335
4468
        break;
1336
116824
      case kFile:
1337
116824
        url->scheme = "file:";
1338
116824
        url->host.clear();
1339
116824
        url->flags |= URL_FLAGS_HAS_HOST;
1340

116824
        if (ch == '/' || ch == '\\') {
1341
112506
          state = kFileSlash;
1342

4318
        } else if (has_base && base->scheme == "file:") {
1343

4299
          switch (ch) {
1344
4
            case kEOL:
1345
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1346
4
                url->host = base->host;
1347
              }
1348
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1349
4
                url->flags |= URL_FLAGS_HAS_PATH;
1350
4
                url->path = base->path;
1351
              }
1352
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1353
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1354
4
                url->query = base->query;
1355
              }
1356
4
              break;
1357
4
            case '?':
1358
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1359
4
                url->host = base->host;
1360
              }
1361
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1362
4
                url->flags |= URL_FLAGS_HAS_PATH;
1363
4
                url->path = base->path;
1364
              }
1365
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1366
4
              url->query.clear();
1367
4
              state = kQuery;
1368
4
              break;
1369
4
            case '#':
1370
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1371
4
                url->host = base->host;
1372
              }
1373
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1374
4
                url->flags |= URL_FLAGS_HAS_PATH;
1375
4
                url->path = base->path;
1376
              }
1377
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1378
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1379
4
                url->query = base->query;
1380
              }
1381
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1382
4
              url->fragment.clear();
1383
4
              state = kFragment;
1384
4
              break;
1385
4287
            default:
1386
4287
              url->query.clear();
1387
4287
              if (base->flags & URL_FLAGS_HAS_HOST) {
1388
4287
                url->host = base->host;
1389
              }
1390
4287
              if (base->flags & URL_FLAGS_HAS_PATH) {
1391
4287
                url->flags |= URL_FLAGS_HAS_PATH;
1392
4287
                url->path = base->path;
1393
              }
1394
4287
              if (!StartsWithWindowsDriveLetter(p, end)) {
1395
4263
                ShortenUrlPath(url);
1396
              } else {
1397
24
                url->path.clear();
1398
              }
1399
4287
              state = kPath;
1400
4287
              continue;
1401
          }
1402
        } else {
1403
19
          state = kPath;
1404
19
          continue;
1405
        }
1406
112518
        break;
1407
112506
      case kFileSlash:
1408

112506
        if (ch == '/' || ch == '\\') {
1409
112364
          state = kFileHost;
1410
        } else {
1411

142
          if (has_base && base->scheme == "file:") {
1412
128
            url->flags |= URL_FLAGS_HAS_HOST;
1413
128
            url->host = base->host;
1414

242
            if (!StartsWithWindowsDriveLetter(p, end) &&
1415
114
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1416
4
              url->flags |= URL_FLAGS_HAS_PATH;
1417
4
              url->path.push_back(base->path[0]);
1418
            }
1419
          }
1420
142
          state = kPath;
1421
142
          continue;
1422
        }
1423
112364
        break;
1424
113484
      case kFileHost:
1425

113484
        if (ch == kEOL ||
1426
1118
            ch == '/' ||
1427
1108
            ch == '\\' ||
1428
1108
            ch == '?' ||
1429
            ch == '#') {
1430
112364
          if (!has_state_override &&
1431

224740
              buffer.size() == 2 &&
1432
22
              IsWindowsDriveLetter(buffer)) {
1433
12
            state = kPath;
1434
112364
          } else if (buffer.size() == 0) {
1435
112170
            url->flags |= URL_FLAGS_HAS_HOST;
1436
112170
            url->host.clear();
1437
112170
            if (has_state_override)
1438
4
              return;
1439
112166
            state = kPathStart;
1440
          } else {
1441
194
            std::string host;
1442
194
            if (!ParseHost(buffer, &host, special)) {
1443
52
              url->flags |= URL_FLAGS_FAILED;
1444
52
              return;
1445
            }
1446
142
            if (host == "localhost")
1447
37
              host.clear();
1448
142
            url->flags |= URL_FLAGS_HAS_HOST;
1449
142
            url->host = host;
1450
142
            if (has_state_override)
1451
4
              return;
1452
138
            buffer.clear();
1453
138
            state = kPathStart;
1454
          }
1455
112316
          continue;
1456
        } else {
1457
1108
          buffer += ch;
1458
        }
1459
1108
        break;
1460
188714
      case kPathStart:
1461
188714
        if (IsSpecial(url->scheme)) {
1462
188166
          state = kPath;
1463

188166
          if (ch != '/' && ch != '\\') {
1464
73754
            continue;
1465
          }
1466

548
        } else if (!has_state_override && ch == '?') {
1467
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1468
6
          url->query.clear();
1469
6
          state = kQuery;
1470

542
        } else if (!has_state_override && ch == '#') {
1471
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1472
6
          url->fragment.clear();
1473
6
          state = kFragment;
1474
536
        } else if (ch != kEOL) {
1475
459
          state = kPath;
1476
459
          if (ch != '/') {
1477
35
            continue;
1478
          }
1479

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1480
2
          url->flags |= URL_FLAGS_HAS_PATH;
1481
2
          url->path.emplace_back("");
1482
        }
1483
114925
        break;
1484
11935472
      case kPath:
1485

11935472
        if (ch == kEOL ||
1486
10783434
            ch == '/' ||
1487
10783364
            special_back_slash ||
1488

10783364
            (!has_state_override && (ch == '?' || ch == '#'))) {
1489
1152629
          if (IsDoubleDotSegment(buffer)) {
1490
3351
            ShortenUrlPath(url);
1491

3351
            if (ch != '/' && !special_back_slash) {
1492
269
              url->flags |= URL_FLAGS_HAS_PATH;
1493
269
              url->path.emplace_back("");
1494
            }
1495
1151188
          } else if (IsSingleDotSegment(buffer) &&
1496

1151188
                     ch != '/' && !special_back_slash) {
1497
386
            url->flags |= URL_FLAGS_HAS_PATH;
1498
386
            url->path.emplace_back("");
1499
1148892
          } else if (!IsSingleDotSegment(buffer)) {
1500
2289753
            if (url->scheme == "file:" &&
1501
1294352
                url->path.empty() &&
1502

2441720
                buffer.size() == 2 &&
1503
100
                IsWindowsDriveLetter(buffer)) {
1504
98
              buffer[1] = ':';
1505
            }
1506
1147368
            url->flags |= URL_FLAGS_HAS_PATH;
1507
1147368
            url->path.emplace_back(std::move(buffer));
1508
          }
1509
1152629
          buffer.clear();
1510
2305258
          if (ch == '?') {
1511
480
            url->flags |= URL_FLAGS_HAS_QUERY;
1512
480
            url->query.clear();
1513
480
            state = kQuery;
1514
1152149
          } else if (ch == '#') {
1515
41
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1516
41
            url->fragment.clear();
1517
41
            state = kFragment;
1518
          }
1519
        } else {
1520
10782843
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1521
        }
1522
11935472
        break;
1523
38479
      case kCannotBeBase:
1524
38479
        switch (ch) {
1525
4
          case '?':
1526
4
            state = kQuery;
1527
4
            break;
1528
10
          case '#':
1529
10
            state = kFragment;
1530
10
            break;
1531
38465
          default:
1532
38465
            if (url->path.empty())
1533
              url->path.emplace_back("");
1534
38465
            else if (ch != kEOL)
1535
34463
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1536
        }
1537
38479
        break;
1538
5918
      case kQuery:
1539

5918
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1540
685
          url->flags |= URL_FLAGS_HAS_QUERY;
1541
685
          url->query = std::move(buffer);
1542
685
          buffer.clear();
1543
1060
          if (ch == '#')
1544
375
            state = kFragment;
1545
        } else {
1546
5233
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1547
                                                QUERY_ENCODE_SET_NONSPECIAL);
1548
        }
1549
5918
        break;
1550
4097
      case kFragment:
1551
4097
        switch (ch) {
1552
570
          case kEOL:
1553
570
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1554
570
            url->fragment = std::move(buffer);
1555
570
            break;
1556
3527
          default:
1557
3527
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1558
        }
1559
4097
        break;
1560
      default:
1561
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1562
        return;
1563
    }
1564
1565
13116071
    p++;
1566
  }
1567
}  // NOLINT(readability/fn_size)
1568
1569
// https://url.spec.whatwg.org/#url-serializing
1570
33797
std::string URL::SerializeURL(const struct url_data* url,
1571
                              bool exclude = false) {
1572
33797
  std::string output = url->scheme;
1573
33797
  if (url->flags & URL_FLAGS_HAS_HOST) {
1574
33797
    output += "//";
1575
33797
    if (url->flags & URL_FLAGS_HAS_USERNAME ||
1576
33797
        url->flags & URL_FLAGS_HAS_PASSWORD) {
1577
      if (url->flags & URL_FLAGS_HAS_USERNAME) {
1578
        output += url->username;
1579
      }
1580
      if (url->flags & URL_FLAGS_HAS_PASSWORD) {
1581
        output += ":" + url->password;
1582
      }
1583
      output += "@";
1584
    }
1585
33797
    output += url->host;
1586
33797
    if (url->port != -1) {
1587
      output += ":" + std::to_string(url->port);
1588
    }
1589
  }
1590
33797
  if (url->flags & URL_FLAGS_CANNOT_BE_BASE) {
1591
    output += url->path[0];
1592
  } else {
1593
    if (!(url->flags & URL_FLAGS_HAS_HOST) &&
1594

33797
          url->path.size() > 1 &&
1595
          url->path[0].empty()) {
1596
      output += "/.";
1597
    }
1598
359849
    for (size_t i = 1; i < url->path.size(); i++) {
1599
326052
      output += "/" + url->path[i];
1600
    }
1601
  }
1602
33797
  if (url->flags & URL_FLAGS_HAS_QUERY) {
1603
    output = "?" + url->query;
1604
  }
1605

33797
  if (!exclude && url->flags & URL_FLAGS_HAS_FRAGMENT) {
1606
    output = "#" + url->fragment;
1607
  }
1608
33797
  return output;
1609
}
1610
1611
namespace {
1612
130591
void SetArgs(Environment* env,
1613
             Local<Value> argv[ARG_COUNT],
1614
             const struct url_data& url) {
1615
130591
  Isolate* isolate = env->isolate();
1616
130591
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1617
261182
  argv[ARG_PROTOCOL] =
1618
130591
      url.flags & URL_FLAGS_SPECIAL ?
1619
125618
          GetSpecial(env, url.scheme) :
1620
4973
          OneByteString(isolate, url.scheme.c_str());
1621
130591
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1622
1220
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1623
130591
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1624
1180
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1625
130591
  if (url.flags & URL_FLAGS_HAS_HOST)
1626
252628
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1627
130591
  if (url.flags & URL_FLAGS_HAS_QUERY)
1628
1386
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1629
130591
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1630
1132
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1631
130591
  if (url.port > -1)
1632
2186
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1633
130591
  if (url.flags & URL_FLAGS_HAS_PATH)
1634
260038
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1635
130591
}
1636
1637
138782
void Parse(Environment* env,
1638
           Local<Value> recv,
1639
           const char* input,
1640
           size_t len,
1641
           enum url_parse_state state_override,
1642
           Local<Value> base_obj,
1643
           Local<Value> context_obj,
1644
           Local<Function> cb,
1645
           Local<Value> error_cb) {
1646
138782
  Isolate* isolate = env->isolate();
1647
138782
  Local<Context> context = env->context();
1648
138782
  HandleScope handle_scope(isolate);
1649
138782
  Context::Scope context_scope(context);
1650
1651
138782
  const bool has_context = context_obj->IsObject();
1652
138782
  const bool has_base = base_obj->IsObject();
1653
1654
138782
  url_data base;
1655
138782
  url_data url;
1656
138782
  if (has_context)
1657
39987
    url = HarvestContext(env, context_obj.As<Object>());
1658
138782
  if (has_base)
1659
6449
    base = HarvestBase(env, base_obj.As<Object>());
1660
1661
138782
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1662

138782
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1663
39987
      ((state_override != kUnknownState) &&
1664
39987
       (url.flags & URL_FLAGS_TERMINATED)))
1665
44
    return;
1666
1667
  // Define the return value placeholders
1668
138738
  const Local<Value> undef = Undefined(isolate);
1669
138738
  const Local<Value> null = Null(isolate);
1670
138738
  if (!(url.flags & URL_FLAGS_FAILED)) {
1671
    Local<Value> argv[] = {
1672
      undef,
1673
      undef,
1674
      undef,
1675
      undef,
1676
      null,  // host defaults to null
1677
      null,  // port defaults to null
1678
      undef,
1679
      null,  // query defaults to null
1680
      null,  // fragment defaults to null
1681
130591
    };
1682
130591
    SetArgs(env, argv, url);
1683
261182
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1684
8147
  } else if (error_cb->IsFunction()) {
1685
8017
    Local<Value> argv[2] = { undef, undef };
1686
8017
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1687
8017
    argv[ERR_ARG_INPUT] =
1688
16034
      String::NewFromUtf8(env->isolate(), input).ToLocalChecked();
1689
8017
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1690
8017
        .FromMaybe(Local<Value>());
1691
  }
1692
}
1693
1694
138782
void Parse(const FunctionCallbackInfo<Value>& args) {
1695
138782
  Environment* env = Environment::GetCurrent(args);
1696
138782
  CHECK_GE(args.Length(), 5);
1697
277564
  CHECK(args[0]->IsString());  // input
1698


376885
  CHECK(args[2]->IsUndefined() ||  // base context
1699
        args[2]->IsNull() ||
1700
        args[2]->IsObject());
1701


397525
  CHECK(args[3]->IsUndefined() ||  // context
1702
        args[3]->IsNull() ||
1703
        args[3]->IsObject());
1704
138782
  CHECK(args[4]->IsFunction());  // complete callback
1705

376359
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1706
1707
138782
  Utf8Value input(env->isolate(), args[0]);
1708
138782
  enum url_parse_state state_override = kUnknownState;
1709
138782
  if (args[1]->IsNumber()) {
1710
138782
    state_override = static_cast<enum url_parse_state>(
1711
277564
        args[1]->Uint32Value(env->context()).FromJust());
1712
  }
1713
1714
277564
  Parse(env, args.This(),
1715
138782
        *input, input.length(),
1716
        state_override,
1717
        args[2],
1718
        args[3],
1719
277564
        args[4].As<Function>(),
1720
        args[5]);
1721
138782
}
1722
1723
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1724
92
  Environment* env = Environment::GetCurrent(args);
1725
92
  CHECK_GE(args.Length(), 1);
1726
184
  CHECK(args[0]->IsString());
1727
184
  Utf8Value value(env->isolate(), args[0]);
1728
92
  std::string output;
1729
92
  size_t len = value.length();
1730
92
  output.reserve(len);
1731
756
  for (size_t n = 0; n < len; n++) {
1732
664
    const char ch = (*value)[n];
1733
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1734
  }
1735
276
  args.GetReturnValue().Set(
1736
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1737
92
}
1738
1739
17
void ToUSVString(const FunctionCallbackInfo<Value>& args) {
1740
17
  Environment* env = Environment::GetCurrent(args);
1741
17
  CHECK_GE(args.Length(), 2);
1742
34
  CHECK(args[0]->IsString());
1743
17
  CHECK(args[1]->IsNumber());
1744
1745
17
  TwoByteValue value(env->isolate(), args[0]);
1746
1747
17
  int64_t start = args[1]->IntegerValue(env->context()).FromJust();
1748
17
  CHECK_GE(start, 0);
1749
1750
49
  for (size_t i = start; i < value.length(); i++) {
1751
32
    char16_t c = value[i];
1752
32
    if (!IsUnicodeSurrogate(c)) {
1753
13
      continue;
1754

19
    } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) {
1755
16
      value[i] = kUnicodeReplacementCharacter;
1756
    } else {
1757
3
      char16_t d = value[i + 1];
1758
3
      if (IsUnicodeTrail(d)) {
1759
        i++;
1760
      } else {
1761
3
        value[i] = kUnicodeReplacementCharacter;
1762
      }
1763
    }
1764
  }
1765
1766
51
  args.GetReturnValue().Set(
1767
17
      String::NewFromTwoByte(env->isolate(),
1768
17
                             *value,
1769
                             NewStringType::kNormal,
1770
17
                             value.length()).ToLocalChecked());
1771
17
}
1772
1773
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1774
229
  Environment* env = Environment::GetCurrent(args);
1775
229
  CHECK_GE(args.Length(), 1);
1776
458
  CHECK(args[0]->IsString());
1777
229
  Utf8Value value(env->isolate(), args[0]);
1778
1779
229
  URLHost host;
1780
  // Assuming the host is used for a special scheme.
1781
229
  host.ParseHost(*value, value.length(), true);
1782
229
  if (host.ParsingFailed()) {
1783
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1784
12
    return;
1785
  }
1786
217
  std::string out = host.ToStringMove();
1787
651
  args.GetReturnValue().Set(
1788
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1789
}
1790
1791
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1792
207
  Environment* env = Environment::GetCurrent(args);
1793
207
  CHECK_GE(args.Length(), 1);
1794
414
  CHECK(args[0]->IsString());
1795
207
  Utf8Value value(env->isolate(), args[0]);
1796
1797
207
  URLHost host;
1798
  // Assuming the host is used for a special scheme.
1799
207
  host.ParseHost(*value, value.length(), true, true);
1800
207
  if (host.ParsingFailed()) {
1801
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1802
12
    return;
1803
  }
1804
195
  std::string out = host.ToStringMove();
1805
585
  args.GetReturnValue().Set(
1806
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1807
}
1808
1809
616
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1810
616
  Environment* env = Environment::GetCurrent(args);
1811
616
  CHECK_EQ(args.Length(), 1);
1812
616
  CHECK(args[0]->IsFunction());
1813
1232
  env->set_url_constructor_function(args[0].As<Function>());
1814
616
}
1815
1816
616
void Initialize(Local<Object> target,
1817
                Local<Value> unused,
1818
                Local<Context> context,
1819
                void* priv) {
1820
616
  Environment* env = Environment::GetCurrent(context);
1821
616
  env->SetMethod(target, "parse", Parse);
1822
616
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1823
616
  env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
1824
616
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1825
616
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1826
616
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1827
1828
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1829
16632
  FLAGS(XX)
1830
#undef XX
1831
1832
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1833
25872
  PARSESTATES(XX)
1834
#undef XX
1835
616
}
1836
}  // namespace
1837
1838
4852
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1839
4852
  registry->Register(Parse);
1840
4852
  registry->Register(EncodeAuthSet);
1841
4852
  registry->Register(ToUSVString);
1842
4852
  registry->Register(DomainToASCII);
1843
4852
  registry->Register(DomainToUnicode);
1844
4852
  registry->Register(SetURLConstructor);
1845
4852
}
1846
1847
8
std::string URL::ToFilePath() const {
1848
8
  if (context_.scheme != "file:") {
1849
1
    return "";
1850
  }
1851
1852
#ifdef _WIN32
1853
  const char* slash = "\\";
1854
  auto is_slash = [] (char ch) {
1855
    return ch == '/' || ch == '\\';
1856
  };
1857
#else
1858
7
  const char* slash = "/";
1859
46
  auto is_slash = [] (char ch) {
1860
46
    return ch == '/';
1861
  };
1862

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1863
7
      context_.host.length() > 0) {
1864
1
    return "";
1865
  }
1866
#endif
1867
12
  std::string decoded_path;
1868
18
  for (const std::string& part : context_.path) {
1869
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1870
58
    for (char& ch : decoded) {
1871
46
      if (is_slash(ch)) {
1872
1
        return "";
1873
      }
1874
    }
1875
12
    decoded_path += slash + decoded;
1876
  }
1877
1878
#ifdef _WIN32
1879
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1880
1881
  // If hostname is set, then we have a UNC path. Pass the hostname through
1882
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1883
  // need to worry about percent encoding because the URL parser will have
1884
  // already taken care of that for us. Note that this only causes IDNs with an
1885
  // appropriate `xn--` prefix to be decoded.
1886
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1887
      context_.host.length() > 0) {
1888
    std::string unicode_host;
1889
    if (!ToUnicode(context_.host, &unicode_host)) {
1890
      return "";
1891
    }
1892
    return "\\\\" + unicode_host + decoded_path;
1893
  }
1894
  // Otherwise, it's a local path that requires a drive letter.
1895
  if (decoded_path.length() < 3) {
1896
    return "";
1897
  }
1898
  if (decoded_path[2] != ':' ||
1899
      !IsASCIIAlpha(decoded_path[1])) {
1900
    return "";
1901
  }
1902
  // Strip out the leading '\'.
1903
  return decoded_path.substr(1);
1904
#else
1905
5
  return decoded_path;
1906
#endif
1907
}
1908
1909
33797
URL URL::FromFilePath(const std::string& file_path) {
1910
67594
  URL url("file://");
1911
67594
  std::string escaped_file_path;
1912
3627517
  for (size_t i = 0; i < file_path.length(); ++i) {
1913
3593720
    escaped_file_path += file_path[i];
1914
3593720
    if (file_path[i] == '%')
1915
11
      escaped_file_path += "25";
1916
  }
1917
33797
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1918
             &url.context_, true, nullptr, false);
1919
33797
  return url;
1920
}
1921
1922
// This function works by calling out to a JS function that creates and
1923
// returns the JS URL object. Be mindful of the JS<->Native boundary
1924
// crossing that is required.
1925
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1926
  Isolate* isolate = env->isolate();
1927
  Local<Context> context = env->context();
1928
  Context::Scope context_scope(context);
1929
1930
  const Local<Value> undef = Undefined(isolate);
1931
  const Local<Value> null = Null(isolate);
1932
1933
  if (context_.flags & URL_FLAGS_FAILED)
1934
    return Local<Value>();
1935
1936
  Local<Value> argv[] = {
1937
    undef,
1938
    undef,
1939
    undef,
1940
    undef,
1941
    null,  // host defaults to null
1942
    null,  // port defaults to null
1943
    undef,
1944
    null,  // query defaults to null
1945
    null,  // fragment defaults to null
1946
  };
1947
  SetArgs(env, argv, context_);
1948
1949
  MaybeLocal<Value> ret;
1950
  {
1951
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1952
1953
    // The SetURLConstructor method must have been called already to
1954
    // set the constructor function used below. SetURLConstructor is
1955
    // called automatically when the internal/url.js module is loaded
1956
    // during the internal/bootstrap/node.js processing.
1957
    ret = env->url_constructor_function()
1958
        ->Call(env->context(), undef, arraysize(argv), argv);
1959
  }
1960
1961
  return ret;
1962
}
1963
1964
}  // namespace url
1965
}  // namespace node
1966
1967
4925
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1968
4852
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)