GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1120 1178 95.1 %
Date: 2022-02-11 04:14:20 Branches: 973 1100 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <string>
11
#include <vector>
12
13
namespace node {
14
15
using errors::TryCatchScope;
16
17
using url::table_data::hex;
18
using url::table_data::C0_CONTROL_ENCODE_SET;
19
using url::table_data::FRAGMENT_ENCODE_SET;
20
using url::table_data::PATH_ENCODE_SET;
21
using url::table_data::USERINFO_ENCODE_SET;
22
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
23
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
24
25
using v8::Array;
26
using v8::Context;
27
using v8::Function;
28
using v8::FunctionCallbackInfo;
29
using v8::HandleScope;
30
using v8::Int32;
31
using v8::Integer;
32
using v8::Isolate;
33
using v8::Local;
34
using v8::MaybeLocal;
35
using v8::NewStringType;
36
using v8::Null;
37
using v8::Object;
38
using v8::String;
39
using v8::Undefined;
40
using v8::Value;
41
42
159800
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
43
159800
  return String::NewFromUtf8(isolate,
44
                             str.data(),
45
                             NewStringType::kNormal,
46
159800
                             str.length()).ToLocalChecked();
47
}
48
49
namespace url {
50
namespace {
51
52
// https://url.spec.whatwg.org/#eof-code-point
53
constexpr char kEOL = -1;
54
55
// https://url.spec.whatwg.org/#concept-host
56
class URLHost {
57
 public:
58
  ~URLHost();
59
60
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
61
  void ParseIPv6Host(const char* input, size_t length);
62
  void ParseOpaqueHost(const char* input, size_t length);
63
  void ParseHost(const char* input,
64
                 size_t length,
65
                 bool is_special,
66
                 bool unicode = false);
67
68
5001
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
69
  std::string ToString() const;
70
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
71
  std::string ToStringMove();
72
73
 private:
74
  enum class HostType {
75
    H_FAILED,
76
    H_DOMAIN,
77
    H_IPV4,
78
    H_IPV6,
79
    H_OPAQUE,
80
  };
81
82
  union Value {
83
    std::string domain_or_opaque;
84
    uint32_t ipv4;
85
    uint16_t ipv6[8];
86
87
5001
    ~Value() {}
88
5001
    Value() : ipv4(0) {}
89
  };
90
91
  Value value_;
92
  HostType type_ = HostType::H_FAILED;
93
94
13571
  void Reset() {
95
    using string = std::string;
96
13571
    switch (type_) {
97
4082
      case HostType::H_DOMAIN:
98
      case HostType::H_OPAQUE:
99
4082
        value_.domain_or_opaque.~string();
100
4082
        break;
101
9489
      default:
102
9489
        break;
103
    }
104
13571
    type_ = HostType::H_FAILED;
105
13571
  }
106
107
  // Setting the string members of the union with = is brittle because
108
  // it relies on them being initialized to a state that requires no
109
  // destruction of old data.
110
  // For a long time, that worked well enough because ParseIPv6Host() happens
111
  // to zero-fill `value_`, but that really is relying on standard library
112
  // internals too much.
113
  // These helpers are the easiest solution but we might want to consider
114
  // just not forcing strings into an union.
115
458
  void SetOpaque(std::string&& string) {
116
458
    Reset();
117
458
    type_ = HostType::H_OPAQUE;
118
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
119
458
  }
120
121
3624
  void SetDomain(std::string&& string) {
122
3624
    Reset();
123
3624
    type_ = HostType::H_DOMAIN;
124
3624
    new(&value_.domain_or_opaque) std::string(std::move(string));
125
3624
  }
126
};
127
128
5001
URLHost::~URLHost() {
129
5001
  Reset();
130
5001
}
131
132
#define ARGS(XX)                                                              \
133
  XX(ARG_FLAGS)                                                               \
134
  XX(ARG_PROTOCOL)                                                            \
135
  XX(ARG_USERNAME)                                                            \
136
  XX(ARG_PASSWORD)                                                            \
137
  XX(ARG_HOST)                                                                \
138
  XX(ARG_PORT)                                                                \
139
  XX(ARG_PATH)                                                                \
140
  XX(ARG_QUERY)                                                               \
141
  XX(ARG_FRAGMENT)                                                            \
142
  XX(ARG_COUNT)  // This one has to be last.
143
144
#define ERR_ARGS(XX)                                                          \
145
  XX(ERR_ARG_FLAGS)                                                           \
146
  XX(ERR_ARG_INPUT)                                                           \
147
148
enum url_cb_args {
149
#define XX(name) name,
150
  ARGS(XX)
151
#undef XX
152
};
153
154
enum url_error_cb_args {
155
#define XX(name) name,
156
  ERR_ARGS(XX)
157
#undef XX
158
};
159
160
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
161
  template <typename T>                                                       \
162
  bool name(const T ch1, const T ch2) {                                \
163
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
164
                  "Character must be wider than " #bits " bits");             \
165
    return (expr);                                                            \
166
  }                                                                           \
167
  template <typename T>                                                       \
168
  bool name(const std::basic_string<T>& str) {                         \
169
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
170
                  "Character must be wider than " #bits " bits");             \
171
    return str.length() >= 2 && name(str[0], str[1]);                         \
172
  }
173
174
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
175

16059552
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
176
177
// https://infra.spec.whatwg.org/#c0-control-or-space
178

326248
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
179
180
// https://infra.spec.whatwg.org/#ascii-digit
181

631312
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
182
183
// https://infra.spec.whatwg.org/#ascii-hex-digit
184


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
185
                               (ch >= 'A' && ch <= 'F') ||
186
                               (ch >= 'a' && ch <= 'f')))
187
188
// https://infra.spec.whatwg.org/#ascii-alpha
189


1404863
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
190
                            (ch >= 'a' && ch <= 'z')))
191
192
// https://infra.spec.whatwg.org/#ascii-alphanumeric
193

617344
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
194
195
// https://infra.spec.whatwg.org/#ascii-lowercase
196
template <typename T>
197
617416
T ASCIILowercase(T ch) {
198
617416
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
199
}
200
201
// https://url.spec.whatwg.org/#forbidden-host-code-point
202









89132
CHAR_TEST(8, IsForbiddenHostCodePoint,
203
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
204
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
205
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
206
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
207
          ch == '^' || ch == '|')
208
209
// https://url.spec.whatwg.org/#windows-drive-letter
210

12946
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
211
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
212
213
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
214

2624
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
215
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
216
217
#undef TWO_CHAR_STRING_TEST
218
219
13564219
bool BitAt(const uint8_t a[], const uint8_t i) {
220
13564219
  return !!(a[i >> 3] & (1 << (i & 7)));
221
}
222
223
// Appends ch to str. If ch position in encode_set is set, the ch will
224
// be percent-encoded then appended.
225
13564219
void AppendOrEscape(std::string* str,
226
                    const unsigned char ch,
227
                    const uint8_t encode_set[]) {
228
13564219
  if (BitAt(encode_set, ch))
229
1923
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
230
  else
231
13562296
    *str += ch;
232
13564219
}
233
234
template <typename T>
235
850
unsigned hex2bin(const T ch) {
236

850
  if (ch >= '0' && ch <= '9')
237
546
    return ch - '0';
238

304
  if (ch >= 'A' && ch <= 'F')
239
172
    return 10 + (ch - 'A');
240

132
  if (ch >= 'a' && ch <= 'f')
241
132
    return 10 + (ch - 'a');
242
  return static_cast<unsigned>(-1);
243
}
244
245
4211
std::string PercentDecode(const char* input, size_t len) {
246
4211
  std::string dest;
247
4211
  if (len == 0)
248
2
    return dest;
249
4209
  dest.reserve(len);
250
4209
  const char* pointer = input;
251
4209
  const char* end = input + len;
252
253
91891
  while (pointer < end) {
254
87682
    const char ch = pointer[0];
255
87682
    size_t remaining = end - pointer - 1;
256


88119
    if (ch != '%' || remaining < 2 ||
257
437
        (ch == '%' &&
258
437
         (!IsASCIIHexDigit(pointer[1]) ||
259
433
          !IsASCIIHexDigit(pointer[2])))) {
260
87257
      dest += ch;
261
87257
      pointer++;
262
87257
      continue;
263
    } else {
264
425
      unsigned a = hex2bin(pointer[1]);
265
425
      unsigned b = hex2bin(pointer[2]);
266
425
      char c = static_cast<char>(a * 16 + b);
267
425
      dest += c;
268
425
      pointer += 3;
269
    }
270
  }
271
4209
  return dest;
272
}
273
274
#define SPECIALS(XX)                                                          \
275
  XX(ftp, 21, "ftp:")                                                         \
276
  XX(file, -1, "file:")                                                       \
277
  XX(http, 80, "http:")                                                       \
278
  XX(https, 443, "https:")                                                    \
279
  XX(ws, 80, "ws:")                                                           \
280
  XX(wss, 443, "wss:")
281
282
376536
bool IsSpecial(const std::string& scheme) {
283
#define V(_, __, name) if (scheme == name) return true;
284



376536
  SPECIALS(V);
285
#undef V
286
6350
  return false;
287
}
288
289
156273
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
290
#define V(key, _, name) if (scheme == name)                                  \
291
    return env->url_special_##key##_string();
292



156273
  SPECIALS(V)
293
#undef V
294
  UNREACHABLE();
295
}
296
297
149008
int NormalizePort(const std::string& scheme, int p) {
298
#define V(_, port, name) if (scheme == name && p == port) return -1;
299









149008
  SPECIALS(V);
300
#undef V
301
10605
  return p;
302
}
303
304
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
305
7255
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
306
7255
  size_t length = end - p;
307
6229
  return length >= 2 &&
308

13520
    IsWindowsDriveLetter(p[0], p[1]) &&
309
36
    (length == 2 ||
310
36
      p[2] == '/' ||
311
14
      p[2] == '\\' ||
312
6
      p[2] == '?' ||
313
7259
      p[2] == '#');
314
}
315
316
#if defined(NODE_HAVE_I18N_SUPPORT)
317
195
bool ToUnicode(const std::string& input, std::string* output) {
318
390
  MaybeStackBuffer<char> buf;
319
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
320
    return false;
321
195
  output->assign(*buf, buf.length());
322
195
  return true;
323
}
324
325
4198
bool ToASCII(const std::string& input, std::string* output) {
326
8396
  MaybeStackBuffer<char> buf;
327
4198
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
328
124
    return false;
329
4074
  if (buf.length() == 0)
330
24
    return false;
331
4050
  output->assign(*buf, buf.length());
332
4050
  return true;
333
}
334
#else  // !defined(NODE_HAVE_I18N_SUPPORT)
335
// Intentional non-ops if ICU is not present.
336
bool ToUnicode(const std::string& input, std::string* output) {
337
  *output = input;
338
  return true;
339
}
340
341
bool ToASCII(const std::string& input, std::string* output) {
342
  *output = input;
343
  return true;
344
}
345
#endif  // !defined(NODE_HAVE_I18N_SUPPORT)
346
347
#define NS_IN6ADDRSZ 16
348
349
275
void URLHost::ParseIPv6Host(const char* input, size_t length) {
350
275
  CHECK_EQ(type_, HostType::H_FAILED);
351
352
  unsigned char buf[sizeof(struct in6_addr)];
353
275
  MaybeStackBuffer<char> ipv6(length + 1);
354
275
  *(*ipv6 + length) = 0;
355
275
  memset(buf, 0, sizeof(buf));
356
275
  memcpy(*ipv6, input, sizeof(const char) * length);
357
358
275
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
359
360
275
  if (ret != 0) {
361
92
    return;
362
  }
363
364
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
365
1647
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
366
1464
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
367
  }
368
369
183
  type_ = HostType::H_IPV6;
370
}
371
372
4502
int64_t ParseNumber(const char* start, const char* end) {
373
4502
  unsigned R = 10;
374

4502
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
375
48
    start += 2;
376
48
    R = 16;
377
  }
378
4502
  if (end - start == 0) {
379
8
    return 0;
380

4494
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
381
55
    start++;
382
55
    R = 8;
383
  }
384
4494
  const char* p = start;
385
386
6517
  while (p < end) {
387
5631
    const char ch = p[0];
388

5631
    switch (R) {
389
274
      case 8:
390

274
        if (ch < '0' || ch > '7')
391
29
          return -1;
392
245
        break;
393
5149
      case 10:
394
5149
        if (!IsASCIIDigit(ch))
395
3575
          return -1;
396
1574
        break;
397
208
      case 16:
398
208
        if (!IsASCIIHexDigit(ch))
399
4
          return -1;
400
204
        break;
401
    }
402
2023
    p++;
403
  }
404
886
  return strtoll(start, nullptr, R);
405
}
406
407
3875
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
408
3875
  CHECK_EQ(type_, HostType::H_FAILED);
409
3875
  *is_ipv4 = false;
410
3875
  const char* pointer = input;
411
3875
  const char* mark = input;
412
3875
  const char* end = pointer + length;
413
3875
  int parts = 0;
414
3875
  uint32_t val = 0;
415
  uint64_t numbers[4];
416
3875
  int tooBigNumbers = 0;
417
3875
  if (length == 0)
418
3652
    return;
419
420
35957
  while (pointer <= end) {
421
35710
    const char ch = pointer < end ? pointer[0] : kEOL;
422
35710
    int64_t remaining = end - pointer - 1;
423

35710
    if (ch == '.' || ch == kEOL) {
424
4518
      if (++parts > static_cast<int>(arraysize(numbers)))
425
4
        return;
426
4514
      if (pointer == mark)
427
12
        return;
428
4502
      int64_t n = ParseNumber(mark, pointer);
429
4502
      if (n < 0)
430
3608
        return;
431
432
894
      if (n > 255) {
433
112
        tooBigNumbers++;
434
      }
435
894
      numbers[parts - 1] = n;
436
894
      mark = pointer + 1;
437

894
      if (ch == '.' && remaining == 0)
438
4
        break;
439
    }
440
32082
    pointer++;
441
  }
442
251
  CHECK_GT(parts, 0);
443
251
  *is_ipv4 = true;
444
445
  // If any but the last item in numbers is greater than 255, return failure.
446
  // If the last item in numbers is greater than or equal to
447
  // 256^(5 - the number of items in numbers), return failure.
448
247
  if (tooBigNumbers > 1 ||
449

558
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
450
243
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
451
28
    return;
452
  }
453
454
223
  type_ = HostType::H_IPV4;
455
223
  val = static_cast<uint32_t>(numbers[parts - 1]);
456
768
  for (int n = 0; n < parts - 1; n++) {
457
545
    double b = 3 - n;
458
545
    val +=
459
545
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
460
  }
461
462
223
  value_.ipv4 = val;
463
}
464
465
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
466
520
  CHECK_EQ(type_, HostType::H_FAILED);
467
520
  std::string output;
468
520
  output.reserve(length);
469
3053
  for (size_t i = 0; i < length; i++) {
470
2595
    const char ch = input[i];
471

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
472
62
      return;
473
    } else {
474
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
475
    }
476
  }
477
478
458
  SetOpaque(std::move(output));
479
}
480
481
5001
void URLHost::ParseHost(const char* input,
482
                        size_t length,
483
                        bool is_special,
484
                        bool unicode) {
485
5001
  CHECK_EQ(type_, HostType::H_FAILED);
486
5001
  const char* pointer = input;
487
488
5001
  if (length == 0)
489
1377
    return;
490
491
5001
  if (pointer[0] == '[') {
492
283
    if (pointer[length - 1] != ']')
493
8
      return;
494
275
    return ParseIPv6Host(++pointer, length - 2);
495
  }
496
497
4718
  if (!is_special)
498
520
    return ParseOpaqueHost(input, length);
499
500
  // First, we have to percent decode
501
4198
  std::string decoded = PercentDecode(input, length);
502
503
  // Then we have to punycode toASCII
504
4198
  if (!ToASCII(decoded, &decoded))
505
148
    return;
506
507
  // If any of the following characters are still present, we have to fail
508
90438
  for (size_t n = 0; n < decoded.size(); n++) {
509
86563
    const char ch = decoded[n];
510
86563
    if (IsForbiddenHostCodePoint(ch)) {
511
175
      return;
512
    }
513
  }
514
515
  // Check to see if it's an IPv4 IP address
516
  bool is_ipv4;
517
3875
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
518
3875
  if (is_ipv4)
519
251
    return;
520
521
  // If the unicode flag is set, run the result through punycode ToUnicode
522

3624
  if (unicode && !ToUnicode(decoded, &decoded))
523
    return;
524
525
  // It's not an IPv4 or IPv6 address, it must be a domain
526
3624
  SetDomain(std::move(decoded));
527
}
528
529
// Locates the longest sequence of 0 segments in an IPv6 address
530
// in order to use the :: compression when serializing
531
template <typename T>
532
183
T* FindLongestZeroSequence(T* values, size_t len) {
533
183
  T* start = values;
534
183
  T* end = start + len;
535
183
  T* result = nullptr;
536
537
183
  T* current = nullptr;
538
183
  unsigned counter = 0, longest = 1;
539
540
1647
  while (start < end) {
541
1464
    if (*start == 0) {
542
1231
      if (current == nullptr)
543
197
        current = start;
544
1231
      counter++;
545
    } else {
546
233
      if (counter > longest) {
547
175
        longest = counter;
548
175
        result = current;
549
      }
550
233
      counter = 0;
551
233
      current = nullptr;
552
    }
553
1464
    start++;
554
  }
555
183
  if (counter > longest)
556
6
    result = current;
557
183
  return result;
558
}
559
560
4488
std::string URLHost::ToStringMove() {
561
4488
  std::string return_value;
562
4488
  switch (type_) {
563
4082
    case HostType::H_DOMAIN:
564
    case HostType::H_OPAQUE:
565
4082
      return_value = std::move(value_.domain_or_opaque);
566
4082
      break;
567
406
    default:
568
406
      return_value = ToString();
569
406
      break;
570
  }
571
4488
  Reset();
572
4488
  return return_value;
573
}
574
575
406
std::string URLHost::ToString() const {
576
812
  std::string dest;
577

406
  switch (type_) {
578
    case HostType::H_DOMAIN:
579
    case HostType::H_OPAQUE:
580
      return value_.domain_or_opaque;
581
223
    case HostType::H_IPV4: {
582
223
      dest.reserve(15);
583
223
      uint32_t value = value_.ipv4;
584
1115
      for (int n = 0; n < 4; n++) {
585
892
        dest.insert(0, std::to_string(value % 256));
586
892
        if (n < 3)
587
669
          dest.insert(0, 1, '.');
588
892
        value /= 256;
589
      }
590
223
      break;
591
    }
592
183
    case HostType::H_IPV6: {
593
183
      dest.reserve(41);
594
183
      dest += '[';
595
183
      const uint16_t* start = &value_.ipv6[0];
596
      const uint16_t* compress_pointer =
597
183
          FindLongestZeroSequence(start, 8);
598
183
      bool ignore0 = false;
599
1647
      for (int n = 0; n <= 7; n++) {
600
1464
        const uint16_t* piece = &value_.ipv6[n];
601

1464
        if (ignore0 && *piece == 0)
602
1211
          continue;
603
432
        else if (ignore0)
604
173
          ignore0 = false;
605
432
        if (compress_pointer == piece) {
606
179
          dest += n == 0 ? "::" : ":";
607
179
          ignore0 = true;
608
179
          continue;
609
        }
610
        char buf[5];
611
253
        snprintf(buf, sizeof(buf), "%x", *piece);
612
253
        dest += buf;
613
253
        if (n < 7)
614
76
          dest += ':';
615
      }
616
183
      dest += ']';
617
183
      break;
618
    }
619
    case HostType::H_FAILED:
620
      break;
621
  }
622
406
  return dest;
623
}
624
625
4659
bool ParseHost(const std::string& input,
626
               std::string* output,
627
               bool is_special,
628
               bool unicode = false) {
629
4659
  if (input.empty()) {
630
94
    output->clear();
631
94
    return true;
632
  }
633
9130
  URLHost host;
634
4565
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
635
4565
  if (host.ParsingFailed())
636
489
    return false;
637
4076
  *output = host.ToStringMove();
638
4076
  return true;
639
}
640
641
9349
std::vector<std::string> FromJSStringArray(Environment* env,
642
                                           Local<Array> array) {
643
9349
  std::vector<std::string> vec;
644
9349
  if (array->Length() > 0)
645
9333
    vec.reserve(array->Length());
646
148074
  for (size_t n = 0; n < array->Length(); n++) {
647
129376
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
648
129376
    if (val->IsString()) {
649
64688
      Utf8Value value(env->isolate(), val.As<String>());
650
64688
      vec.emplace_back(*value, value.length());
651
    }
652
  }
653
9349
  return vec;
654
}
655
656
9349
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
657
9349
  url_data base;
658
9349
  Local<Context> context = env->context();
659
660
  Local<Value> flags =
661
28047
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
662
9349
  if (flags->IsInt32())
663
18698
    base.flags = flags->Int32Value(context).FromJust();
664
665
  Local<Value> port =
666
28047
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
667
9349
  if (port->IsInt32())
668
40
    base.port = port->Int32Value(context).FromJust();
669
670
  Local<Value> scheme =
671
18698
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
672
9349
  base.scheme = Utf8Value(env->isolate(), scheme).out();
673
674
  auto GetStr = [&](std::string url_data::*member,
675
                    int flag,
676
                    Local<String> name,
677
46745
                    bool empty_as_present) {
678
93490
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
679
93490
    if (value->IsString()) {
680
53818
      Utf8Value utf8value(env->isolate(), value.As<String>());
681
26909
      (base.*member).assign(*utf8value, utf8value.length());
682

45607
      if (empty_as_present || value.As<String>()->Length() != 0) {
683
8233
        base.flags |= flag;
684
      }
685
    }
686
56094
  };
687
9349
  GetStr(&url_data::username,
688
         URL_FLAGS_HAS_USERNAME,
689
         env->username_string(),
690
         false);
691
9349
  GetStr(&url_data::password,
692
         URL_FLAGS_HAS_PASSWORD,
693
         env->password_string(),
694
         false);
695
9349
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
696
9349
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
697
9349
  GetStr(&url_data::fragment,
698
         URL_FLAGS_HAS_FRAGMENT,
699
         env->fragment_string(),
700
         true);
701
702
  Local<Value>
703
28047
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
704
9349
  if (path->IsArray()) {
705
9349
    base.flags |= URL_FLAGS_HAS_PATH;
706
9349
    base.path = FromJSStringArray(env, path.As<Array>());
707
  }
708
9349
  return base;
709
}
710
711
47287
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
712
47287
  url_data context;
713
  Local<Value> flags =
714
141861
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
715
47287
  if (flags->IsInt32()) {
716
    static constexpr int32_t kCopyFlagsMask =
717
        URL_FLAGS_SPECIAL |
718
        URL_FLAGS_CANNOT_BE_BASE |
719
        URL_FLAGS_HAS_USERNAME |
720
        URL_FLAGS_HAS_PASSWORD |
721
        URL_FLAGS_HAS_HOST;
722
47287
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
723
  }
724
  Local<Value> scheme =
725
141861
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
726
94574
  if (scheme->IsString()) {
727
94574
    Utf8Value value(env->isolate(), scheme);
728
47287
    context.scheme.assign(*value, value.length());
729
  }
730
  Local<Value> port =
731
141861
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
732
47287
  if (port->IsInt32())
733
237
    context.port = port.As<Int32>()->Value();
734
47287
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
735
    Local<Value> username =
736
221
        context_obj->Get(env->context(),
737
663
                         env->username_string()).ToLocalChecked();
738
442
    CHECK(username->IsString());
739
442
    Utf8Value value(env->isolate(), username);
740
221
    context.username.assign(*value, value.length());
741
  }
742
47287
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
743
    Local<Value> password =
744
209
        context_obj->Get(env->context(),
745
627
                         env->password_string()).ToLocalChecked();
746
418
    CHECK(password->IsString());
747
418
    Utf8Value value(env->isolate(), password);
748
209
    context.password.assign(*value, value.length());
749
  }
750
  Local<Value> host =
751
47287
      context_obj->Get(env->context(),
752
141861
                       env->host_string()).ToLocalChecked();
753
94574
  if (host->IsString()) {
754
94498
    Utf8Value value(env->isolate(), host);
755
47249
    context.host.assign(*value, value.length());
756
  }
757
47287
  return context;
758
}
759
760
// Single dot segment can be ".", "%2e", or "%2E"
761
2862928
bool IsSingleDotSegment(const std::string& str) {
762
2862928
  switch (str.size()) {
763
7858
    case 1:
764
7858
      return str == ".";
765
180404
    case 3:
766
180404
      return str[0] == '%' &&
767

180450
             str[1] == '2' &&
768
180450
             ASCIILowercase(str[2]) == 'e';
769
2674666
    default:
770
2674666
      return false;
771
  }
772
}
773
774
// Double dot segment can be:
775
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
776
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
777
1436362
bool IsDoubleDotSegment(const std::string& str) {
778

1436362
  switch (str.size()) {
779
4919
    case 2:
780
4919
      return str == "..";
781
380878
    case 4:
782

380878
      if (str[0] != '.' && str[0] != '%')
783
380859
        return false;
784
19
      return ((str[0] == '.' &&
785
13
               str[1] == '%' &&
786

8
               str[2] == '2' &&
787
42
               ASCIILowercase(str[3]) == 'e') ||
788
15
              (str[0] == '%' &&
789

12
               str[1] == '2' &&
790
6
               ASCIILowercase(str[2]) == 'e' &&
791
25
               str[3] == '.'));
792
87005
    case 6:
793
87005
      return (str[0] == '%' &&
794

24
              str[1] == '2' &&
795
12
              ASCIILowercase(str[2]) == 'e' &&
796
4
              str[3] == '%' &&
797

87021
              str[4] == '2' &&
798
87009
              ASCIILowercase(str[5]) == 'e');
799
963560
    default:
800
963560
      return false;
801
  }
802
}
803
804
11648
void ShortenUrlPath(struct url_data* url) {
805
11648
  if (url->path.empty()) return;
806


11836
  if (url->path.size() == 1 && url->scheme == "file:" &&
807
578
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
808
11258
  url->path.pop_back();
809
}
810
811
}  // anonymous namespace
812
813
250098
void URL::Parse(const char* input,
814
                size_t len,
815
                enum url_parse_state state_override,
816
                struct url_data* url,
817
                bool has_url,
818
                const struct url_data* base,
819
                bool has_base) {
820
250098
  const char* p = input;
821
250098
  const char* end = input + len;
822
823
250098
  if (!has_url) {
824
163147
    for (const char* ptr = p; ptr < end; ptr++) {
825
163128
      if (IsC0ControlOrSpace(*ptr))
826
56
        p++;
827
      else
828
163072
        break;
829
    }
830
163139
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
831
163120
      if (IsC0ControlOrSpace(*ptr))
832
48
        end--;
833
      else
834
163072
        break;
835
    }
836
163091
    input = p;
837
163091
    len = end - p;
838
  }
839
840
  // The spec says we should strip out any ASCII tabs or newlines.
841
  // In those cases, we create another std::string instance with the filtered
842
  // contents, but in the general case we avoid the overhead.
843
250098
  std::string whitespace_stripped;
844
16308627
  for (const char* ptr = p; ptr < end; ptr++) {
845
16058699
    if (!IsASCIITabOrNewline(*ptr))
846
16058529
      continue;
847
    // Hit tab or newline. Allocate storage, copy what we have until now,
848
    // and then iterate and filter all similar characters out.
849
170
    whitespace_stripped.reserve(len - 1);
850
170
    whitespace_stripped.assign(p, ptr - p);
851
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
852
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
853
853
      if (!IsASCIITabOrNewline(*ptr))
854
769
        whitespace_stripped += *ptr;
855
    }
856
857
    // Update variables like they should have looked like if the string
858
    // had been stripped of whitespace to begin with.
859
170
    input = whitespace_stripped.c_str();
860
170
    len = whitespace_stripped.size();
861
170
    p = input;
862
170
    end = input + len;
863
170
    break;
864
  }
865
866
250098
  bool atflag = false;  // Set when @ has been seen.
867
250098
  bool square_bracket_flag = false;  // Set inside of [...]
868
250098
  bool password_token_seen_flag = false;  // Set after a : after an username.
869
870
250098
  std::string buffer;
871
872
  // Set the initial parse state.
873
250098
  const bool has_state_override = state_override != kUnknownState;
874
250098
  enum url_parse_state state = has_state_override ? state_override :
875
                                                    kSchemeStart;
876
877

250098
  if (state < kSchemeStart || state > kFragment) {
878
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
879
    return;
880
  }
881
882
16877183
  while (p <= end) {
883
16635852
    const char ch = p < end ? p[0] : kEOL;
884
16635852
    bool special = (url->flags & URL_FLAGS_SPECIAL);
885
    bool cannot_be_base;
886

16635852
    bool special_back_slash = (special && ch == '\\');
887
888





16635852
    switch (state) {
889
163174
      case kSchemeStart:
890
163174
        if (IsASCIIAlpha(ch)) {
891
149682
          buffer += ASCIILowercase(ch);
892
149682
          state = kScheme;
893
13492
        } else if (!has_state_override) {
894
13482
          state = kNoScheme;
895
13482
          continue;
896
        } else {
897
10
          url->flags |= URL_FLAGS_FAILED;
898
10
          return;
899
        }
900
149682
        break;
901
617344
      case kScheme:
902


617344
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
903
467662
          buffer += ASCIILowercase(ch);
904

149682
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
905

147698
          if (has_state_override && buffer.size() == 0) {
906
            url->flags |= URL_FLAGS_TERMINATED;
907
            return;
908
          }
909
147698
          buffer += ':';
910
911
147698
          bool new_is_special = IsSpecial(buffer);
912
913
147698
          if (has_state_override) {
914
45
            if ((special != new_is_special) ||
915
45
                ((buffer == "file:") &&
916
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
917
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
918


116
                  (url->port != -1))) ||
919
45
                  (url->scheme == "file:" && url->host.empty())) {
920
32
              url->flags |= URL_FLAGS_TERMINATED;
921
32
              return;
922
            }
923
          }
924
925
147666
          url->scheme = std::move(buffer);
926
147666
          url->port = NormalizePort(url->scheme, url->port);
927
147666
          if (new_is_special) {
928
142022
            url->flags |= URL_FLAGS_SPECIAL;
929
142022
            special = true;
930
          } else {
931
5644
            url->flags &= ~URL_FLAGS_SPECIAL;
932
5644
            special = false;
933
          }
934

147666
          special_back_slash = (special && ch == '\\');
935
147666
          buffer.clear();
936
147666
          if (has_state_override)
937
33
            return;
938
147633
          if (url->scheme == "file:") {
939
138354
            state = kFile;
940
3651
          } else if (special &&
941

12930
                     has_base &&
942
1033
                     url->scheme == base->scheme) {
943
323
            state = kSpecialRelativeOrAuthority;
944
8956
          } else if (special) {
945
3328
            state = kSpecialAuthoritySlashes;
946

5628
          } else if (p + 1 < end && p[1] == '/') {
947
716
            state = kPathOrAuthority;
948
716
            p++;
949
          } else {
950
4912
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
951
4912
            url->flags |= URL_FLAGS_HAS_PATH;
952
4912
            url->path.emplace_back("");
953
4912
            state = kCannotBeBase;
954
147633
          }
955
1984
        } else if (!has_state_override) {
956
1976
          buffer.clear();
957
1976
          state = kNoScheme;
958
1976
          p = input;
959
1976
          continue;
960
        } else {
961
8
          url->flags |= URL_FLAGS_FAILED;
962
8
          return;
963
        }
964
615295
        break;
965
15458
      case kNoScheme:
966

15458
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
967

15458
        if (!has_base || (cannot_be_base && ch != '#')) {
968
7783
          url->flags |= URL_FLAGS_FAILED;
969
7783
          return;
970

7675
        } else if (cannot_be_base && ch == '#') {
971
28
          url->scheme = base->scheme;
972
28
          if (IsSpecial(url->scheme)) {
973
            url->flags |= URL_FLAGS_SPECIAL;
974
            special = true;
975
          } else {
976
28
            url->flags &= ~URL_FLAGS_SPECIAL;
977
28
            special = false;
978
          }
979

28
          special_back_slash = (special && ch == '\\');
980
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
981
28
            url->flags |= URL_FLAGS_HAS_PATH;
982
28
            url->path = base->path;
983
          }
984
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
985
4
            url->flags |= URL_FLAGS_HAS_QUERY;
986
4
            url->query = base->query;
987
          }
988
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
989
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
990
            url->fragment = base->fragment;
991
          }
992
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
993
28
          state = kFragment;
994

15294
        } else if (has_base &&
995
7647
                   base->scheme != "file:") {
996
371
          state = kRelative;
997
371
          continue;
998
        } else {
999
7276
          url->scheme = "file:";
1000
7276
          url->flags |= URL_FLAGS_SPECIAL;
1001
7276
          special = true;
1002
7276
          state = kFile;
1003

7276
          special_back_slash = (special && ch == '\\');
1004
7276
          continue;
1005
        }
1006
28
        break;
1007
323
      case kSpecialRelativeOrAuthority:
1008

323
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1009
291
          state = kSpecialAuthorityIgnoreSlashes;
1010
291
          p++;
1011
        } else {
1012
32
          state = kRelative;
1013
32
          continue;
1014
        }
1015
291
        break;
1016
716
      case kPathOrAuthority:
1017
716
        if (ch == '/') {
1018
548
          state = kAuthority;
1019
        } else {
1020
168
          state = kPath;
1021
168
          continue;
1022
        }
1023
548
        break;
1024
403
      case kRelative:
1025
403
        url->scheme = base->scheme;
1026
403
        if (IsSpecial(url->scheme)) {
1027
303
          url->flags |= URL_FLAGS_SPECIAL;
1028
303
          special = true;
1029
        } else {
1030
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1031
100
          special = false;
1032
        }
1033

403
        special_back_slash = (special && ch == '\\');
1034

403
        switch (ch) {
1035
18
          case kEOL:
1036
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1037
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1038
4
              url->username = base->username;
1039
            }
1040
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1041
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1042
4
              url->password = base->password;
1043
            }
1044
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1045
16
              url->flags |= URL_FLAGS_HAS_HOST;
1046
16
              url->host = base->host;
1047
            }
1048
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1049
              url->flags |= URL_FLAGS_HAS_QUERY;
1050
              url->query = base->query;
1051
            }
1052
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1053
18
              url->flags |= URL_FLAGS_HAS_PATH;
1054
18
              url->path = base->path;
1055
            }
1056
18
            url->port = base->port;
1057
18
            break;
1058
124
          case '/':
1059
124
            state = kRelativeSlash;
1060
124
            break;
1061
38
          case '?':
1062
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1063
              url->flags |= URL_FLAGS_HAS_USERNAME;
1064
              url->username = base->username;
1065
            }
1066
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1067
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1068
              url->password = base->password;
1069
            }
1070
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1071
34
              url->flags |= URL_FLAGS_HAS_HOST;
1072
34
              url->host = base->host;
1073
            }
1074
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1075
38
              url->flags |= URL_FLAGS_HAS_PATH;
1076
38
              url->path = base->path;
1077
            }
1078
38
            url->port = base->port;
1079
38
            state = kQuery;
1080
38
            break;
1081
38
          case '#':
1082
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1083
              url->flags |= URL_FLAGS_HAS_USERNAME;
1084
              url->username = base->username;
1085
            }
1086
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1087
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1088
              url->password = base->password;
1089
            }
1090
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1091
34
              url->flags |= URL_FLAGS_HAS_HOST;
1092
34
              url->host = base->host;
1093
            }
1094
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1095
              url->flags |= URL_FLAGS_HAS_QUERY;
1096
              url->query = base->query;
1097
            }
1098
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1099
38
              url->flags |= URL_FLAGS_HAS_PATH;
1100
38
              url->path = base->path;
1101
            }
1102
38
            url->port = base->port;
1103
38
            state = kFragment;
1104
38
            break;
1105
185
          default:
1106
185
            if (special_back_slash) {
1107
18
              state = kRelativeSlash;
1108
            } else {
1109
167
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1110
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1111
1
                url->username = base->username;
1112
              }
1113
167
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1114
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1115
1
                url->password = base->password;
1116
              }
1117
167
              if (base->flags & URL_FLAGS_HAS_HOST) {
1118
147
                url->flags |= URL_FLAGS_HAS_HOST;
1119
147
                url->host = base->host;
1120
              }
1121
167
              if (base->flags & URL_FLAGS_HAS_PATH) {
1122
167
                url->flags |= URL_FLAGS_HAS_PATH;
1123
167
                url->path = base->path;
1124
167
                ShortenUrlPath(url);
1125
              }
1126
167
              url->port = base->port;
1127
167
              state = kPath;
1128
167
              continue;
1129
            }
1130
        }
1131
236
        break;
1132
142
      case kRelativeSlash:
1133


142
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1134
22
          state = kSpecialAuthorityIgnoreSlashes;
1135
120
        } else if (ch == '/') {
1136
6
          state = kAuthority;
1137
        } else {
1138
114
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1139
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1140
8
            url->username = base->username;
1141
          }
1142
114
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1143
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1144
4
            url->password = base->password;
1145
          }
1146
114
          if (base->flags & URL_FLAGS_HAS_HOST) {
1147
106
            url->flags |= URL_FLAGS_HAS_HOST;
1148
106
            url->host = base->host;
1149
          }
1150
114
          url->port = base->port;
1151
114
          state = kPath;
1152
114
          continue;
1153
        }
1154
28
        break;
1155
3328
      case kSpecialAuthoritySlashes:
1156
3328
        state = kSpecialAuthorityIgnoreSlashes;
1157

3328
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1158
3179
          p++;
1159
        } else {
1160
149
          continue;
1161
        }
1162
3179
        break;
1163
3718
      case kSpecialAuthorityIgnoreSlashes:
1164

3718
        if (ch != '/' && ch != '\\') {
1165
3641
          state = kAuthority;
1166
3641
          continue;
1167
        }
1168
77
        break;
1169
90388
      case kAuthority:
1170
90388
        if (ch == '@') {
1171
565
          if (atflag) {
1172
41
            buffer.reserve(buffer.size() + 3);
1173
41
            buffer.insert(0, "%40");
1174
          }
1175
565
          atflag = true;
1176
565
          size_t blen = buffer.size();
1177

565
          if (blen > 0 && buffer[0] != ':') {
1178
469
            url->flags |= URL_FLAGS_HAS_USERNAME;
1179
          }
1180
6652
          for (size_t n = 0; n < blen; n++) {
1181
6087
            const char bch = buffer[n];
1182
6087
            if (bch == ':') {
1183
444
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1184
444
              if (!password_token_seen_flag) {
1185
428
                password_token_seen_flag = true;
1186
428
                continue;
1187
              }
1188
            }
1189
5659
            if (password_token_seen_flag) {
1190
2722
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1191
            } else {
1192
2937
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1193
            }
1194
          }
1195
565
          buffer.clear();
1196

89823
        } else if (ch == kEOL ||
1197
85698
                   ch == '/' ||
1198
85666
                   ch == '?' ||
1199
85648
                   ch == '#' ||
1200
                   special_back_slash) {
1201

4195
          if (atflag && buffer.size() == 0) {
1202
52
            url->flags |= URL_FLAGS_FAILED;
1203
52
            return;
1204
          }
1205
4143
          p -= buffer.size() + 1;
1206
4143
          buffer.clear();
1207
4143
          state = kHost;
1208
        } else {
1209
85628
          buffer += ch;
1210
        }
1211
90336
        break;
1212
83081
      case kHost:
1213
      case kHostname:
1214

83081
        if (has_state_override && url->scheme == "file:") {
1215
12
          state = kFileHost;
1216
12
          continue;
1217

83069
        } else if (ch == ':' && !square_bracket_flag) {
1218
1382
          if (buffer.size() == 0) {
1219
24
            url->flags |= URL_FLAGS_FAILED;
1220
24
            return;
1221
          }
1222
1358
          if (state_override == kHostname) {
1223
4
            return;
1224
          }
1225
1354
          url->flags |= URL_FLAGS_HAS_HOST;
1226
1354
          if (!ParseHost(buffer, &url->host, special)) {
1227
5
            url->flags |= URL_FLAGS_FAILED;
1228
5
            return;
1229
          }
1230
1349
          buffer.clear();
1231
1349
          state = kPort;
1232

81687
        } else if (ch == kEOL ||
1233
78636
                   ch == '/' ||
1234
78596
                   ch == '?' ||
1235
78570
                   ch == '#' ||
1236
                   special_back_slash) {
1237
3141
          p--;
1238

3141
          if (special && buffer.size() == 0) {
1239
21
            url->flags |= URL_FLAGS_FAILED;
1240
21
            return;
1241
          }
1242
331
          if (has_state_override &&
1243

3489
              buffer.size() == 0 &&
1244
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1245
38
               url->port != -1)) {
1246
8
            url->flags |= URL_FLAGS_TERMINATED;
1247
8
            return;
1248
          }
1249
3112
          url->flags |= URL_FLAGS_HAS_HOST;
1250
3112
          if (!ParseHost(buffer, &url->host, special)) {
1251
432
            url->flags |= URL_FLAGS_FAILED;
1252
432
            return;
1253
          }
1254
2680
          buffer.clear();
1255
2680
          state = kPathStart;
1256
2680
          if (has_state_override) {
1257
227
            return;
1258
          }
1259
        } else {
1260
78546
          if (ch == '[')
1261
277
            square_bracket_flag = true;
1262
78546
          if (ch == ']')
1263
273
            square_bracket_flag = false;
1264
78546
          buffer += ch;
1265
        }
1266
82348
        break;
1267
7741
      case kPort:
1268
7741
        if (IsASCIIDigit(ch)) {
1269
6323
          buffer += ch;
1270

1418
        } else if (has_state_override ||
1271
902
                   ch == kEOL ||
1272
36
                   ch == '/' ||
1273
36
                   ch == '?' ||
1274
36
                   ch == '#' ||
1275
                   special_back_slash) {
1276
1382
          if (buffer.size() > 0) {
1277
1368
            unsigned port = 0;
1278
            // the condition port <= 0xffff prevents integer overflow
1279

7475
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1280
6107
              port = port * 10 + buffer[i] - '0';
1281
1368
            if (port > 0xffff) {
1282
              // TODO(TimothyGu): This hack is currently needed for the host
1283
              // setter since it needs access to hostname if it is valid, and
1284
              // if the FAILED flag is set the entire response to JS layer
1285
              // will be empty.
1286
26
              if (state_override == kHost)
1287
2
                url->port = -1;
1288
              else
1289
24
                url->flags |= URL_FLAGS_FAILED;
1290
26
              return;
1291
            }
1292
            // the port is valid
1293
1342
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1294
1342
            if (url->port == -1)
1295
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1296
1342
            buffer.clear();
1297
14
          } else if (has_state_override) {
1298
            // TODO(TimothyGu): Similar case as above.
1299
6
            if (state_override == kHost)
1300
2
              url->port = -1;
1301
            else
1302
4
              url->flags |= URL_FLAGS_TERMINATED;
1303
6
            return;
1304
          }
1305
1350
          state = kPathStart;
1306
1350
          continue;
1307
        } else {
1308
36
          url->flags |= URL_FLAGS_FAILED;
1309
36
          return;
1310
        }
1311
6323
        break;
1312
145630
      case kFile:
1313
145630
        url->scheme = "file:";
1314
145630
        url->host.clear();
1315
145630
        url->flags |= URL_FLAGS_HAS_HOST;
1316

145630
        if (ch == '/' || ch == '\\') {
1317
138472
          state = kFileSlash;
1318

7158
        } else if (has_base && base->scheme == "file:") {
1319

7139
          switch (ch) {
1320
4
            case kEOL:
1321
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1322
4
                url->host = base->host;
1323
              }
1324
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1325
4
                url->flags |= URL_FLAGS_HAS_PATH;
1326
4
                url->path = base->path;
1327
              }
1328
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1329
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1330
4
                url->query = base->query;
1331
              }
1332
4
              break;
1333
4
            case '?':
1334
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1335
4
                url->host = base->host;
1336
              }
1337
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1338
4
                url->flags |= URL_FLAGS_HAS_PATH;
1339
4
                url->path = base->path;
1340
              }
1341
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1342
4
              url->query.clear();
1343
4
              state = kQuery;
1344
4
              break;
1345
4
            case '#':
1346
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1347
4
                url->host = base->host;
1348
              }
1349
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1350
4
                url->flags |= URL_FLAGS_HAS_PATH;
1351
4
                url->path = base->path;
1352
              }
1353
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1354
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1355
4
                url->query = base->query;
1356
              }
1357
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1358
4
              url->fragment.clear();
1359
4
              state = kFragment;
1360
4
              break;
1361
7127
            default:
1362
7127
              url->query.clear();
1363
7127
              if (base->flags & URL_FLAGS_HAS_HOST) {
1364
7127
                url->host = base->host;
1365
              }
1366
7127
              if (base->flags & URL_FLAGS_HAS_PATH) {
1367
7127
                url->flags |= URL_FLAGS_HAS_PATH;
1368
7127
                url->path = base->path;
1369
              }
1370
7127
              if (!StartsWithWindowsDriveLetter(p, end)) {
1371
7103
                ShortenUrlPath(url);
1372
              } else {
1373
24
                url->path.clear();
1374
              }
1375
7127
              state = kPath;
1376
7127
              continue;
1377
          }
1378
        } else {
1379
19
          state = kPath;
1380
19
          continue;
1381
        }
1382
138484
        break;
1383
138472
      case kFileSlash:
1384

138472
        if (ch == '/' || ch == '\\') {
1385
138292
          state = kFileHost;
1386
        } else {
1387

180
          if (has_base && base->scheme == "file:") {
1388
128
            url->flags |= URL_FLAGS_HAS_HOST;
1389
128
            url->host = base->host;
1390

242
            if (!StartsWithWindowsDriveLetter(p, end) &&
1391
114
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1392
4
              url->flags |= URL_FLAGS_HAS_PATH;
1393
4
              url->path.push_back(base->path[0]);
1394
            }
1395
          }
1396
180
          state = kPath;
1397
180
          continue;
1398
        }
1399
138292
        break;
1400
139409
      case kFileHost:
1401

139409
        if (ch == kEOL ||
1402
1115
            ch == '/' ||
1403
1105
            ch == '\\' ||
1404
1105
            ch == '?' ||
1405
            ch == '#') {
1406
138292
          if (!has_state_override &&
1407

276596
              buffer.size() == 2 &&
1408
22
              IsWindowsDriveLetter(buffer)) {
1409
12
            state = kPath;
1410
138292
          } else if (buffer.size() == 0) {
1411
138099
            url->flags |= URL_FLAGS_HAS_HOST;
1412
138099
            url->host.clear();
1413
138099
            if (has_state_override)
1414
4
              return;
1415
138095
            state = kPathStart;
1416
          } else {
1417
193
            std::string host;
1418
193
            if (!ParseHost(buffer, &host, special)) {
1419
52
              url->flags |= URL_FLAGS_FAILED;
1420
52
              return;
1421
            }
1422
141
            if (host == "localhost")
1423
37
              host.clear();
1424
141
            url->flags |= URL_FLAGS_HAS_HOST;
1425
141
            url->host = host;
1426
141
            if (has_state_override)
1427
4
              return;
1428
137
            buffer.clear();
1429
137
            state = kPathStart;
1430
          }
1431
138244
          continue;
1432
        } else {
1433
1105
          buffer += ch;
1434
        }
1435
1105
        break;
1436
228265
      case kPathStart:
1437
228265
        if (IsSpecial(url->scheme)) {
1438
227717
          state = kPath;
1439

227717
          if (ch != '/' && ch != '\\') {
1440
86965
            continue;
1441
          }
1442

548
        } else if (!has_state_override && ch == '?') {
1443
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1444
6
          url->query.clear();
1445
6
          state = kQuery;
1446

542
        } else if (!has_state_override && ch == '#') {
1447
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1448
6
          url->fragment.clear();
1449
6
          state = kFragment;
1450
536
        } else if (ch != kEOL) {
1451
459
          state = kPath;
1452
459
          if (ch != '/') {
1453
35
            continue;
1454
          }
1455

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1456
2
          url->flags |= URL_FLAGS_HAS_PATH;
1457
2
          url->path.emplace_back("");
1458
        }
1459
141265
        break;
1460
14905692
      case kPath:
1461

14905692
        if (ch == kEOL ||
1462
13470263
            ch == '/' ||
1463
13470193
            special_back_slash ||
1464

13470193
            (!has_state_override && (ch == '?' || ch == '#'))) {
1465
1436362
          if (IsDoubleDotSegment(buffer)) {
1466
4378
            ShortenUrlPath(url);
1467

4378
            if (ch != '/' && !special_back_slash) {
1468
280
              url->flags |= URL_FLAGS_HAS_PATH;
1469
280
              url->path.emplace_back("");
1470
            }
1471
1435708
          } else if (IsSingleDotSegment(buffer) &&
1472

1435708
                     ch != '/' && !special_back_slash) {
1473
1040
            url->flags |= URL_FLAGS_HAS_PATH;
1474
1040
            url->path.emplace_back("");
1475
1430944
          } else if (!IsSingleDotSegment(buffer)) {
1476
2851055
            if (url->scheme == "file:" &&
1477
1608132
                url->path.empty() &&
1478

3036392
                buffer.size() == 2 &&
1479
100
                IsWindowsDriveLetter(buffer)) {
1480
98
              buffer[1] = ':';
1481
            }
1482
1428260
            url->flags |= URL_FLAGS_HAS_PATH;
1483
1428260
            url->path.emplace_back(std::move(buffer));
1484
          }
1485
1436362
          buffer.clear();
1486
2872724
          if (ch == '?') {
1487
808
            url->flags |= URL_FLAGS_HAS_QUERY;
1488
808
            url->query.clear();
1489
808
            state = kQuery;
1490
1435554
          } else if (ch == '#') {
1491
55
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1492
55
            url->fragment.clear();
1493
55
            state = kFragment;
1494
          }
1495
        } else {
1496
13469330
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1497
        }
1498
14905692
        break;
1499
48241
      case kCannotBeBase:
1500
48241
        switch (ch) {
1501
4
          case '?':
1502
4
            state = kQuery;
1503
4
            break;
1504
10
          case '#':
1505
10
            state = kFragment;
1506
10
            break;
1507
48227
          default:
1508
48227
            if (url->path.empty())
1509
              url->path.emplace_back("");
1510
48227
            else if (ch != kEOL)
1511
43329
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1512
        }
1513
48241
        break;
1514
40072
      case kQuery:
1515

40072
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1516
1019
          url->flags |= URL_FLAGS_HAS_QUERY;
1517
1019
          url->query = std::move(buffer);
1518
1019
          buffer.clear();
1519
1408
          if (ch == '#')
1520
389
            state = kFragment;
1521
        } else {
1522
39053
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1523
                                                QUERY_ENCODE_SET_NONSPECIAL);
1524
        }
1525
40072
        break;
1526
4255
      case kFragment:
1527
4255
        switch (ch) {
1528
604
          case kEOL:
1529
604
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1530
604
            url->fragment = std::move(buffer);
1531
604
            break;
1532
3651
          default:
1533
3651
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1534
        }
1535
4255
        break;
1536
      default:
1537
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1538
        return;
1539
    }
1540
1541
16365777
    p++;
1542
  }
1543
}  // NOLINT(readability/fn_size)
1544
1545
// https://url.spec.whatwg.org/#url-serializing
1546
39720
std::string URL::SerializeURL(const struct url_data* url,
1547
                              bool exclude = false) {
1548
39720
  std::string output = url->scheme;
1549
39720
  if (url->flags & URL_FLAGS_HAS_HOST) {
1550
39720
    output += "//";
1551
39720
    if (url->flags & URL_FLAGS_HAS_USERNAME ||
1552
39720
        url->flags & URL_FLAGS_HAS_PASSWORD) {
1553
      if (url->flags & URL_FLAGS_HAS_USERNAME) {
1554
        output += url->username;
1555
      }
1556
      if (url->flags & URL_FLAGS_HAS_PASSWORD) {
1557
        output += ":" + url->password;
1558
      }
1559
      output += "@";
1560
    }
1561
39720
    output += url->host;
1562
39720
    if (url->port != -1) {
1563
      output += ":" + std::to_string(url->port);
1564
    }
1565
  }
1566
39720
  if (url->flags & URL_FLAGS_CANNOT_BE_BASE) {
1567
    output += url->path[0];
1568
  } else {
1569
    if (!(url->flags & URL_FLAGS_HAS_HOST) &&
1570

39720
          url->path.size() > 1 &&
1571
          url->path[0].empty()) {
1572
      output += "/.";
1573
    }
1574
428447
    for (size_t i = 1; i < url->path.size(); i++) {
1575
388727
      output += "/" + url->path[i];
1576
    }
1577
  }
1578
39720
  if (url->flags & URL_FLAGS_HAS_QUERY) {
1579
    output = "?" + url->query;
1580
  }
1581

39720
  if (!exclude && url->flags & URL_FLAGS_HAS_FRAGMENT) {
1582
    output = "#" + url->fragment;
1583
  }
1584
39720
  return output;
1585
}
1586
1587
namespace {
1588
162142
void SetArgs(Environment* env,
1589
             Local<Value> argv[ARG_COUNT],
1590
             const struct url_data& url) {
1591
162142
  Isolate* isolate = env->isolate();
1592
162142
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1593
324284
  argv[ARG_PROTOCOL] =
1594
162142
      url.flags & URL_FLAGS_SPECIAL ?
1595
156273
          GetSpecial(env, url.scheme) :
1596
5869
          OneByteString(isolate, url.scheme.c_str());
1597
162142
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1598
1224
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1599
162142
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1600
1184
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1601
162142
  if (url.flags & URL_FLAGS_HAS_HOST)
1602
313938
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1603
162142
  if (url.flags & URL_FLAGS_HAS_QUERY)
1604
2054
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1605
162142
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1606
1200
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1607
162142
  if (url.port > -1)
1608
2928
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1609
162142
  if (url.flags & URL_FLAGS_HAS_PATH)
1610
323092
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1611
162142
}
1612
1613
170629
void Parse(Environment* env,
1614
           Local<Value> recv,
1615
           const char* input,
1616
           size_t len,
1617
           enum url_parse_state state_override,
1618
           Local<Value> base_obj,
1619
           Local<Value> context_obj,
1620
           Local<Function> cb,
1621
           Local<Value> error_cb) {
1622
170629
  Isolate* isolate = env->isolate();
1623
170629
  Local<Context> context = env->context();
1624
170629
  HandleScope handle_scope(isolate);
1625
170629
  Context::Scope context_scope(context);
1626
1627
170629
  const bool has_context = context_obj->IsObject();
1628
170629
  const bool has_base = base_obj->IsObject();
1629
1630
170629
  url_data base;
1631
170629
  url_data url;
1632
170629
  if (has_context)
1633
47287
    url = HarvestContext(env, context_obj.As<Object>());
1634
170629
  if (has_base)
1635
9349
    base = HarvestBase(env, base_obj.As<Object>());
1636
1637
170629
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1638

170629
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1639
47287
      ((state_override != kUnknownState) &&
1640
47287
       (url.flags & URL_FLAGS_TERMINATED)))
1641
44
    return;
1642
1643
  // Define the return value placeholders
1644
170585
  const Local<Value> undef = Undefined(isolate);
1645
170585
  const Local<Value> null = Null(isolate);
1646
170585
  if (!(url.flags & URL_FLAGS_FAILED)) {
1647
    Local<Value> argv[] = {
1648
      undef,
1649
      undef,
1650
      undef,
1651
      undef,
1652
      null,  // host defaults to null
1653
      null,  // port defaults to null
1654
      undef,
1655
      null,  // query defaults to null
1656
      null,  // fragment defaults to null
1657
162142
    };
1658
162142
    SetArgs(env, argv, url);
1659
324284
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1660
8443
  } else if (error_cb->IsFunction()) {
1661
8313
    Local<Value> argv[2] = { undef, undef };
1662
8313
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1663
8313
    argv[ERR_ARG_INPUT] =
1664
16626
      String::NewFromUtf8(env->isolate(), input).ToLocalChecked();
1665
8313
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1666
8313
        .FromMaybe(Local<Value>());
1667
  }
1668
}
1669
1670
170629
void Parse(const FunctionCallbackInfo<Value>& args) {
1671
170629
  Environment* env = Environment::GetCurrent(args);
1672
170629
  CHECK_GE(args.Length(), 5);
1673
341258
  CHECK(args[0]->IsString());  // input
1674


463879
  CHECK(args[2]->IsUndefined() ||  // base context
1675
        args[2]->IsNull() ||
1676
        args[2]->IsObject());
1677


483119
  CHECK(args[3]->IsUndefined() ||  // context
1678
        args[3]->IsNull() ||
1679
        args[3]->IsObject());
1680
170629
  CHECK(args[4]->IsFunction());  // complete callback
1681

464600
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1682
1683
170629
  Utf8Value input(env->isolate(), args[0]);
1684
170629
  enum url_parse_state state_override = kUnknownState;
1685
170629
  if (args[1]->IsNumber()) {
1686
170629
    state_override = static_cast<enum url_parse_state>(
1687
341258
        args[1]->Uint32Value(env->context()).FromJust());
1688
  }
1689
1690
341258
  Parse(env, args.This(),
1691
170629
        *input, input.length(),
1692
        state_override,
1693
        args[2],
1694
        args[3],
1695
341258
        args[4].As<Function>(),
1696
        args[5]);
1697
170629
}
1698
1699
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1700
92
  Environment* env = Environment::GetCurrent(args);
1701
92
  CHECK_GE(args.Length(), 1);
1702
184
  CHECK(args[0]->IsString());
1703
184
  Utf8Value value(env->isolate(), args[0]);
1704
92
  std::string output;
1705
92
  size_t len = value.length();
1706
92
  output.reserve(len);
1707
756
  for (size_t n = 0; n < len; n++) {
1708
664
    const char ch = (*value)[n];
1709
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1710
  }
1711
276
  args.GetReturnValue().Set(
1712
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1713
92
}
1714
1715
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1716
229
  Environment* env = Environment::GetCurrent(args);
1717
229
  CHECK_GE(args.Length(), 1);
1718
458
  CHECK(args[0]->IsString());
1719
229
  Utf8Value value(env->isolate(), args[0]);
1720
1721
229
  URLHost host;
1722
  // Assuming the host is used for a special scheme.
1723
229
  host.ParseHost(*value, value.length(), true);
1724
229
  if (host.ParsingFailed()) {
1725
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1726
12
    return;
1727
  }
1728
217
  std::string out = host.ToStringMove();
1729
651
  args.GetReturnValue().Set(
1730
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1731
}
1732
1733
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1734
207
  Environment* env = Environment::GetCurrent(args);
1735
207
  CHECK_GE(args.Length(), 1);
1736
414
  CHECK(args[0]->IsString());
1737
207
  Utf8Value value(env->isolate(), args[0]);
1738
1739
207
  URLHost host;
1740
  // Assuming the host is used for a special scheme.
1741
207
  host.ParseHost(*value, value.length(), true, true);
1742
207
  if (host.ParsingFailed()) {
1743
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1744
12
    return;
1745
  }
1746
195
  std::string out = host.ToStringMove();
1747
585
  args.GetReturnValue().Set(
1748
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1749
}
1750
1751
624
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1752
624
  Environment* env = Environment::GetCurrent(args);
1753
624
  CHECK_EQ(args.Length(), 1);
1754
624
  CHECK(args[0]->IsFunction());
1755
1248
  env->set_url_constructor_function(args[0].As<Function>());
1756
624
}
1757
1758
624
void Initialize(Local<Object> target,
1759
                Local<Value> unused,
1760
                Local<Context> context,
1761
                void* priv) {
1762
624
  Environment* env = Environment::GetCurrent(context);
1763
624
  env->SetMethod(target, "parse", Parse);
1764
624
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1765
624
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1766
624
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1767
624
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1768
1769
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1770
16848
  FLAGS(XX)
1771
#undef XX
1772
1773
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1774
26208
  PARSESTATES(XX)
1775
#undef XX
1776
624
}
1777
}  // namespace
1778
1779
4933
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1780
4933
  registry->Register(Parse);
1781
4933
  registry->Register(EncodeAuthSet);
1782
4933
  registry->Register(DomainToASCII);
1783
4933
  registry->Register(DomainToUnicode);
1784
4933
  registry->Register(SetURLConstructor);
1785
4933
}
1786
1787
8
std::string URL::ToFilePath() const {
1788
8
  if (context_.scheme != "file:") {
1789
1
    return "";
1790
  }
1791
1792
#ifdef _WIN32
1793
  const char* slash = "\\";
1794
  auto is_slash = [] (char ch) {
1795
    return ch == '/' || ch == '\\';
1796
  };
1797
#else
1798
7
  const char* slash = "/";
1799
46
  auto is_slash = [] (char ch) {
1800
46
    return ch == '/';
1801
  };
1802

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1803
7
      context_.host.length() > 0) {
1804
1
    return "";
1805
  }
1806
#endif
1807
12
  std::string decoded_path;
1808
18
  for (const std::string& part : context_.path) {
1809
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1810
58
    for (char& ch : decoded) {
1811
46
      if (is_slash(ch)) {
1812
1
        return "";
1813
      }
1814
    }
1815
12
    decoded_path += slash + decoded;
1816
  }
1817
1818
#ifdef _WIN32
1819
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1820
1821
  // If hostname is set, then we have a UNC path. Pass the hostname through
1822
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1823
  // need to worry about percent encoding because the URL parser will have
1824
  // already taken care of that for us. Note that this only causes IDNs with an
1825
  // appropriate `xn--` prefix to be decoded.
1826
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1827
      context_.host.length() > 0) {
1828
    std::string unicode_host;
1829
    if (!ToUnicode(context_.host, &unicode_host)) {
1830
      return "";
1831
    }
1832
    return "\\\\" + unicode_host + decoded_path;
1833
  }
1834
  // Otherwise, it's a local path that requires a drive letter.
1835
  if (decoded_path.length() < 3) {
1836
    return "";
1837
  }
1838
  if (decoded_path[2] != ':' ||
1839
      !IsASCIIAlpha(decoded_path[1])) {
1840
    return "";
1841
  }
1842
  // Strip out the leading '\'.
1843
  return decoded_path.substr(1);
1844
#else
1845
5
  return decoded_path;
1846
#endif
1847
}
1848
1849
39720
URL URL::FromFilePath(const std::string& file_path) {
1850
79440
  URL url("file://");
1851
79440
  std::string escaped_file_path;
1852
4305027
  for (size_t i = 0; i < file_path.length(); ++i) {
1853
4265307
    escaped_file_path += file_path[i];
1854
4265307
    if (file_path[i] == '%')
1855
11
      escaped_file_path += "25";
1856
  }
1857
39720
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1858
             &url.context_, true, nullptr, false);
1859
39720
  return url;
1860
}
1861
1862
// This function works by calling out to a JS function that creates and
1863
// returns the JS URL object. Be mindful of the JS<->Native boundary
1864
// crossing that is required.
1865
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1866
  Isolate* isolate = env->isolate();
1867
  Local<Context> context = env->context();
1868
  Context::Scope context_scope(context);
1869
1870
  const Local<Value> undef = Undefined(isolate);
1871
  const Local<Value> null = Null(isolate);
1872
1873
  if (context_.flags & URL_FLAGS_FAILED)
1874
    return Local<Value>();
1875
1876
  Local<Value> argv[] = {
1877
    undef,
1878
    undef,
1879
    undef,
1880
    undef,
1881
    null,  // host defaults to null
1882
    null,  // port defaults to null
1883
    undef,
1884
    null,  // query defaults to null
1885
    null,  // fragment defaults to null
1886
  };
1887
  SetArgs(env, argv, context_);
1888
1889
  MaybeLocal<Value> ret;
1890
  {
1891
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1892
1893
    // The SetURLConstructor method must have been called already to
1894
    // set the constructor function used below. SetURLConstructor is
1895
    // called automatically when the internal/url.js module is loaded
1896
    // during the internal/bootstrap/node.js processing.
1897
    ret = env->url_constructor_function()
1898
        ->Call(env->context(), undef, arraysize(argv), argv);
1899
  }
1900
1901
  return ret;
1902
}
1903
1904
}  // namespace url
1905
}  // namespace node
1906
1907
4994
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1908
4933
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)