GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: node_url.cc Lines: 1121 1179 95.1 %
Date: 2021-09-27 04:13:02 Branches: 973 1100 88.5 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_external_reference.h"
5
#include "node_i18n.h"
6
#include "util-inl.h"
7
8
#include <cmath>
9
#include <cstdio>
10
#include <string>
11
#include <vector>
12
13
namespace node {
14
15
using errors::TryCatchScope;
16
17
using url::table_data::hex;
18
using url::table_data::C0_CONTROL_ENCODE_SET;
19
using url::table_data::FRAGMENT_ENCODE_SET;
20
using url::table_data::PATH_ENCODE_SET;
21
using url::table_data::USERINFO_ENCODE_SET;
22
using url::table_data::QUERY_ENCODE_SET_NONSPECIAL;
23
using url::table_data::QUERY_ENCODE_SET_SPECIAL;
24
25
using v8::Array;
26
using v8::Context;
27
using v8::Function;
28
using v8::FunctionCallbackInfo;
29
using v8::HandleScope;
30
using v8::Int32;
31
using v8::Integer;
32
using v8::Isolate;
33
using v8::Local;
34
using v8::MaybeLocal;
35
using v8::NewStringType;
36
using v8::Null;
37
using v8::Object;
38
using v8::String;
39
using v8::Undefined;
40
using v8::Value;
41
42
128158
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
43
128158
  return String::NewFromUtf8(isolate,
44
                             str.data(),
45
                             NewStringType::kNormal,
46
128158
                             str.length()).ToLocalChecked();
47
}
48
49
namespace url {
50
namespace {
51
52
// https://url.spec.whatwg.org/#eof-code-point
53
constexpr char kEOL = -1;
54
55
// https://url.spec.whatwg.org/#concept-host
56
class URLHost {
57
 public:
58
  ~URLHost();
59
60
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
61
  void ParseIPv6Host(const char* input, size_t length);
62
  void ParseOpaqueHost(const char* input, size_t length);
63
  void ParseHost(const char* input,
64
                 size_t length,
65
                 bool is_special,
66
                 bool unicode = false);
67
68
4571
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
69
  std::string ToString() const;
70
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
71
  std::string ToStringMove();
72
73
 private:
74
  enum class HostType {
75
    H_FAILED,
76
    H_DOMAIN,
77
    H_IPV4,
78
    H_IPV6,
79
    H_OPAQUE,
80
  };
81
82
  union Value {
83
    std::string domain_or_opaque;
84
    uint32_t ipv4;
85
    uint16_t ipv6[8];
86
87
4571
    ~Value() {}
88
4571
    Value() : ipv4(0) {}
89
  };
90
91
  Value value_;
92
  HostType type_ = HostType::H_FAILED;
93
94
12560
  void Reset() {
95
    using string = std::string;
96
12560
    switch (type_) {
97
3931
      case HostType::H_DOMAIN:
98
      case HostType::H_OPAQUE:
99
3931
        value_.domain_or_opaque.~string();
100
3931
        break;
101
8629
      default:
102
8629
        break;
103
    }
104
12560
    type_ = HostType::H_FAILED;
105
12560
  }
106
107
  // Setting the string members of the union with = is brittle because
108
  // it relies on them being initialized to a state that requires no
109
  // destruction of old data.
110
  // For a long time, that worked well enough because ParseIPv6Host() happens
111
  // to zero-fill `value_`, but that really is relying on standard library
112
  // internals too much.
113
  // These helpers are the easiest solution but we might want to consider
114
  // just not forcing strings into an union.
115
458
  void SetOpaque(std::string&& string) {
116
458
    Reset();
117
458
    type_ = HostType::H_OPAQUE;
118
458
    new(&value_.domain_or_opaque) std::string(std::move(string));
119
458
  }
120
121
3473
  void SetDomain(std::string&& string) {
122
3473
    Reset();
123
3473
    type_ = HostType::H_DOMAIN;
124
3473
    new(&value_.domain_or_opaque) std::string(std::move(string));
125
3473
  }
126
};
127
128
4571
URLHost::~URLHost() {
129
4571
  Reset();
130
4571
}
131
132
#define ARGS(XX)                                                              \
133
  XX(ARG_FLAGS)                                                               \
134
  XX(ARG_PROTOCOL)                                                            \
135
  XX(ARG_USERNAME)                                                            \
136
  XX(ARG_PASSWORD)                                                            \
137
  XX(ARG_HOST)                                                                \
138
  XX(ARG_PORT)                                                                \
139
  XX(ARG_PATH)                                                                \
140
  XX(ARG_QUERY)                                                               \
141
  XX(ARG_FRAGMENT)                                                            \
142
  XX(ARG_COUNT)  // This one has to be last.
143
144
#define ERR_ARGS(XX)                                                          \
145
  XX(ERR_ARG_FLAGS)                                                           \
146
  XX(ERR_ARG_INPUT)                                                           \
147
148
enum url_cb_args {
149
#define XX(name) name,
150
  ARGS(XX)
151
#undef XX
152
};
153
154
enum url_error_cb_args {
155
#define XX(name) name,
156
  ERR_ARGS(XX)
157
#undef XX
158
};
159
160
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
161
  template <typename T>                                                       \
162
  bool name(const T ch1, const T ch2) {                                \
163
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
164
                  "Character must be wider than " #bits " bits");             \
165
    return (expr);                                                            \
166
  }                                                                           \
167
  template <typename T>                                                       \
168
  bool name(const std::basic_string<T>& str) {                         \
169
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
170
                  "Character must be wider than " #bits " bits");             \
171
    return str.length() >= 2 && name(str[0], str[1]);                         \
172
  }
173
174
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
175

12806786
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
176
177
// https://infra.spec.whatwg.org/#c0-control-or-space
178

264218
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
179
180
// https://infra.spec.whatwg.org/#ascii-digit
181

507851
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
182
183
// https://infra.spec.whatwg.org/#ascii-hex-digit
184


1078
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
185
                               (ch >= 'A' && ch <= 'F') ||
186
                               (ch >= 'a' && ch <= 'f')))
187
188
// https://infra.spec.whatwg.org/#ascii-alpha
189


1130988
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
190
                            (ch >= 'a' && ch <= 'z')))
191
192
// https://infra.spec.whatwg.org/#ascii-alphanumeric
193

497090
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
194
195
// https://infra.spec.whatwg.org/#ascii-lowercase
196
template <typename T>
197
497162
T ASCIILowercase(T ch) {
198
497162
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
199
}
200
201
// https://url.spec.whatwg.org/#forbidden-host-code-point
202









86421
CHAR_TEST(8, IsForbiddenHostCodePoint,
203
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
204
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
205
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
206
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
207
          ch == '^' || ch == '|')
208
209
// https://url.spec.whatwg.org/#windows-drive-letter
210

8528
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
211
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
212
213
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
214

2074
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
215
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
216
217
#undef TWO_CHAR_STRING_TEST
218
219
10781683
bool BitAt(const uint8_t a[], const uint8_t i) {
220
10781683
  return !!(a[i >> 3] & (1 << (i & 7)));
221
}
222
223
// Appends ch to str. If ch position in encode_set is set, the ch will
224
// be percent-encoded then appended.
225
10781683
void AppendOrEscape(std::string* str,
226
                    const unsigned char ch,
227
                    const uint8_t encode_set[]) {
228
10781683
  if (BitAt(encode_set, ch))
229
1918
    *str += hex + ch * 4;  // "%XX\0" has a length of 4
230
  else
231
10779765
    *str += ch;
232
10781683
}
233
234
template <typename T>
235
850
unsigned hex2bin(const T ch) {
236

850
  if (ch >= '0' && ch <= '9')
237
546
    return ch - '0';
238

304
  if (ch >= 'A' && ch <= 'F')
239
172
    return 10 + (ch - 'A');
240

132
  if (ch >= 'a' && ch <= 'f')
241
132
    return 10 + (ch - 'a');
242
  return static_cast<unsigned>(-1);
243
}
244
245
3919
std::string PercentDecode(const char* input, size_t len) {
246
3919
  std::string dest;
247
3919
  if (len == 0)
248
2
    return dest;
249
3917
  dest.reserve(len);
250
3917
  const char* pointer = input;
251
3917
  const char* end = input + len;
252
253
88888
  while (pointer < end) {
254
84971
    const char ch = pointer[0];
255
84971
    size_t remaining = end - pointer - 1;
256


85408
    if (ch != '%' || remaining < 2 ||
257
437
        (ch == '%' &&
258
437
         (!IsASCIIHexDigit(pointer[1]) ||
259
433
          !IsASCIIHexDigit(pointer[2])))) {
260
84546
      dest += ch;
261
84546
      pointer++;
262
84546
      continue;
263
    } else {
264
425
      unsigned a = hex2bin(pointer[1]);
265
425
      unsigned b = hex2bin(pointer[2]);
266
425
      char c = static_cast<char>(a * 16 + b);
267
425
      dest += c;
268
425
      pointer += 3;
269
    }
270
  }
271
3917
  return dest;
272
}
273
274
#define SPECIALS(XX)                                                          \
275
  XX(ftp, 21, "ftp:")                                                         \
276
  XX(file, -1, "file:")                                                       \
277
  XX(http, 80, "http:")                                                       \
278
  XX(https, 443, "https:")                                                    \
279
  XX(ws, 80, "ws:")                                                           \
280
  XX(wss, 443, "wss:")
281
282
308297
bool IsSpecial(const std::string& scheme) {
283
#define V(_, __, name) if (scheme == name) return true;
284



308297
  SPECIALS(V);
285
#undef V
286
5438
  return false;
287
}
288
289
125005
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
290
#define V(key, _, name) if (scheme == name)                                  \
291
    return env->url_special_##key##_string();
292



125005
  SPECIALS(V)
293
#undef V
294
  UNREACHABLE();
295
}
296
297
120842
int NormalizePort(const std::string& scheme, int p) {
298
#define V(_, port, name) if (scheme == name && p == port) return -1;
299









120842
  SPECIALS(V);
300
#undef V
301
8896
  return p;
302
}
303
304
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
305
4392
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
306
4392
  size_t length = end - p;
307
4020
  return length >= 2 &&
308

8448
    IsWindowsDriveLetter(p[0], p[1]) &&
309
36
    (length == 2 ||
310
36
      p[2] == '/' ||
311
14
      p[2] == '\\' ||
312
6
      p[2] == '?' ||
313
4396
      p[2] == '#');
314
}
315
316
#if defined(NODE_HAVE_I18N_SUPPORT)
317
195
bool ToUnicode(const std::string& input, std::string* output) {
318
390
  MaybeStackBuffer<char> buf;
319
195
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
320
    return false;
321
195
  output->assign(*buf, buf.length());
322
195
  return true;
323
}
324
325
3906
bool ToASCII(const std::string& input, std::string* output) {
326
7812
  MaybeStackBuffer<char> buf;
327
3906
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
328
124
    return false;
329
3782
  if (buf.length() == 0)
330
24
    return false;
331
3758
  output->assign(*buf, buf.length());
332
3758
  return true;
333
}
334
#else
335
// Intentional non-ops if ICU is not present.
336
bool ToUnicode(const std::string& input, std::string* output) {
337
  *output = input;
338
  return true;
339
}
340
341
bool ToASCII(const std::string& input, std::string* output) {
342
  *output = input;
343
  return true;
344
}
345
#endif
346
347
#define NS_IN6ADDRSZ 16
348
349
137
void URLHost::ParseIPv6Host(const char* input, size_t length) {
350
137
  CHECK_EQ(type_, HostType::H_FAILED);
351
352
  unsigned char buf[sizeof(struct in6_addr)];
353
137
  MaybeStackBuffer<char> ipv6(length + 1);
354
137
  *(*ipv6 + length) = 0;
355
137
  memset(buf, 0, sizeof(buf));
356
137
  memcpy(*ipv6, input, sizeof(const char) * length);
357
358
137
  int ret = uv_inet_pton(AF_INET6, *ipv6, buf);
359
360
137
  if (ret != 0) {
361
92
    return;
362
  }
363
364
  // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119
365
405
  for (int i = 0; i < NS_IN6ADDRSZ; i += 2) {
366
360
    value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1];
367
  }
368
369
45
  type_ = HostType::H_IPV6;
370
}
371
372
3787
int64_t ParseNumber(const char* start, const char* end) {
373
3787
  unsigned R = 10;
374

3787
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
375
48
    start += 2;
376
48
    R = 16;
377
  }
378
3787
  if (end - start == 0) {
379
8
    return 0;
380

3779
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
381
55
    start++;
382
55
    R = 8;
383
  }
384
3779
  const char* p = start;
385
386
4954
  while (p < end) {
387
4632
    const char ch = p[0];
388

4632
    switch (R) {
389
274
      case 8:
390

274
        if (ch < '0' || ch > '7')
391
29
          return -1;
392
245
        break;
393
4150
      case 10:
394
4150
        if (!IsASCIIDigit(ch))
395
3424
          return -1;
396
726
        break;
397
208
      case 16:
398
208
        if (!IsASCIIHexDigit(ch))
399
4
          return -1;
400
204
        break;
401
    }
402
1175
    p++;
403
  }
404
322
  return strtoll(start, nullptr, R);
405
}
406
407
3583
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
408
3583
  CHECK_EQ(type_, HostType::H_FAILED);
409
3583
  *is_ipv4 = false;
410
3583
  const char* pointer = input;
411
3583
  const char* mark = input;
412
3583
  const char* end = pointer + length;
413
3583
  int parts = 0;
414
3583
  uint32_t val = 0;
415
  uint64_t numbers[4];
416
3583
  int tooBigNumbers = 0;
417
3583
  if (length == 0)
418
3501
    return;
419
420
32869
  while (pointer <= end) {
421
32763
    const char ch = pointer < end ? pointer[0] : kEOL;
422
32763
    int64_t remaining = end - pointer - 1;
423

32763
    if (ch == '.' || ch == kEOL) {
424
3803
      if (++parts > static_cast<int>(arraysize(numbers)))
425
4
        return;
426
3799
      if (pointer == mark)
427
12
        return;
428
3787
      int64_t n = ParseNumber(mark, pointer);
429
3787
      if (n < 0)
430
3457
        return;
431
432
330
      if (n > 255) {
433
112
        tooBigNumbers++;
434
      }
435
330
      numbers[parts - 1] = n;
436
330
      mark = pointer + 1;
437

330
      if (ch == '.' && remaining == 0)
438
4
        break;
439
    }
440
29286
    pointer++;
441
  }
442
110
  CHECK_GT(parts, 0);
443
110
  *is_ipv4 = true;
444
445
  // If any but the last item in numbers is greater than 255, return failure.
446
  // If the last item in numbers is greater than or equal to
447
  // 256^(5 - the number of items in numbers), return failure.
448
106
  if (tooBigNumbers > 1 ||
449

276
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
450
102
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
451
28
    return;
452
  }
453
454
82
  type_ = HostType::H_IPV4;
455
82
  val = static_cast<uint32_t>(numbers[parts - 1]);
456
204
  for (int n = 0; n < parts - 1; n++) {
457
122
    double b = 3 - n;
458
122
    val +=
459
122
        static_cast<uint32_t>(numbers[n]) * static_cast<uint32_t>(pow(256, b));
460
  }
461
462
82
  value_.ipv4 = val;
463
}
464
465
520
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
466
520
  CHECK_EQ(type_, HostType::H_FAILED);
467
520
  std::string output;
468
520
  output.reserve(length);
469
3053
  for (size_t i = 0; i < length; i++) {
470
2595
    const char ch = input[i];
471

2595
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
472
62
      return;
473
    } else {
474
2533
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
475
    }
476
  }
477
478
458
  SetOpaque(std::move(output));
479
}
480
481
4571
void URLHost::ParseHost(const char* input,
482
                        size_t length,
483
                        bool is_special,
484
                        bool unicode) {
485
4571
  CHECK_EQ(type_, HostType::H_FAILED);
486
4571
  const char* pointer = input;
487
488
4571
  if (length == 0)
489
1098
    return;
490
491
4571
  if (pointer[0] == '[') {
492
145
    if (pointer[length - 1] != ']')
493
8
      return;
494
137
    return ParseIPv6Host(++pointer, length - 2);
495
  }
496
497
4426
  if (!is_special)
498
520
    return ParseOpaqueHost(input, length);
499
500
  // First, we have to percent decode
501
3906
  std::string decoded = PercentDecode(input, length);
502
503
  // Then we have to punycode toASCII
504
3906
  if (!ToASCII(decoded, &decoded))
505
148
    return;
506
507
  // If any of the following characters are still present, we have to fail
508
87435
  for (size_t n = 0; n < decoded.size(); n++) {
509
83852
    const char ch = decoded[n];
510
83852
    if (IsForbiddenHostCodePoint(ch)) {
511
175
      return;
512
    }
513
  }
514
515
  // Check to see if it's an IPv4 IP address
516
  bool is_ipv4;
517
3583
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
518
3583
  if (is_ipv4)
519
110
    return;
520
521
  // If the unicode flag is set, run the result through punycode ToUnicode
522

3473
  if (unicode && !ToUnicode(decoded, &decoded))
523
    return;
524
525
  // It's not an IPv4 or IPv6 address, it must be a domain
526
3473
  SetDomain(std::move(decoded));
527
}
528
529
// Locates the longest sequence of 0 segments in an IPv6 address
530
// in order to use the :: compression when serializing
531
template <typename T>
532
45
T* FindLongestZeroSequence(T* values, size_t len) {
533
45
  T* start = values;
534
45
  T* end = start + len;
535
45
  T* result = nullptr;
536
537
45
  T* current = nullptr;
538
45
  unsigned counter = 0, longest = 1;
539
540
405
  while (start < end) {
541
360
    if (*start == 0) {
542
265
      if (current == nullptr)
543
59
        current = start;
544
265
      counter++;
545
    } else {
546
95
      if (counter > longest) {
547
37
        longest = counter;
548
37
        result = current;
549
      }
550
95
      counter = 0;
551
95
      current = nullptr;
552
    }
553
360
    start++;
554
  }
555
45
  if (counter > longest)
556
6
    result = current;
557
45
  return result;
558
}
559
560
4058
std::string URLHost::ToStringMove() {
561
4058
  std::string return_value;
562
4058
  switch (type_) {
563
3931
    case HostType::H_DOMAIN:
564
    case HostType::H_OPAQUE:
565
3931
      return_value = std::move(value_.domain_or_opaque);
566
3931
      break;
567
127
    default:
568
127
      return_value = ToString();
569
127
      break;
570
  }
571
4058
  Reset();
572
4058
  return return_value;
573
}
574
575
127
std::string URLHost::ToString() const {
576
254
  std::string dest;
577

127
  switch (type_) {
578
    case HostType::H_DOMAIN:
579
    case HostType::H_OPAQUE:
580
      return value_.domain_or_opaque;
581
82
    case HostType::H_IPV4: {
582
82
      dest.reserve(15);
583
82
      uint32_t value = value_.ipv4;
584
410
      for (int n = 0; n < 4; n++) {
585
        char buf[4];
586
328
        snprintf(buf, sizeof(buf), "%d", value % 256);
587
328
        dest.insert(0, buf);
588
328
        if (n < 3)
589
246
          dest.insert(0, 1, '.');
590
328
        value /= 256;
591
      }
592
82
      break;
593
    }
594
45
    case HostType::H_IPV6: {
595
45
      dest.reserve(41);
596
45
      dest += '[';
597
45
      const uint16_t* start = &value_.ipv6[0];
598
      const uint16_t* compress_pointer =
599
45
          FindLongestZeroSequence(start, 8);
600
45
      bool ignore0 = false;
601
405
      for (int n = 0; n <= 7; n++) {
602
360
        const uint16_t* piece = &value_.ipv6[n];
603

360
        if (ignore0 && *piece == 0)
604
245
          continue;
605
156
        else if (ignore0)
606
35
          ignore0 = false;
607
156
        if (compress_pointer == piece) {
608
41
          dest += n == 0 ? "::" : ":";
609
41
          ignore0 = true;
610
41
          continue;
611
        }
612
        char buf[5];
613
115
        snprintf(buf, sizeof(buf), "%x", *piece);
614
115
        dest += buf;
615
115
        if (n < 7)
616
76
          dest += ':';
617
      }
618
45
      dest += ']';
619
45
      break;
620
    }
621
    case HostType::H_FAILED:
622
      break;
623
  }
624
127
  return dest;
625
}
626
627
4227
bool ParseHost(const std::string& input,
628
               std::string* output,
629
               bool is_special,
630
               bool unicode = false) {
631
4227
  if (input.empty()) {
632
92
    output->clear();
633
92
    return true;
634
  }
635
8270
  URLHost host;
636
4135
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
637
4135
  if (host.ParsingFailed())
638
489
    return false;
639
3646
  *output = host.ToStringMove();
640
3646
  return true;
641
}
642
643
6424
std::vector<std::string> FromJSStringArray(Environment* env,
644
                                           Local<Array> array) {
645
6424
  std::vector<std::string> vec;
646
6424
  if (array->Length() > 0)
647
6408
    vec.reserve(array->Length());
648
88974
  for (size_t n = 0; n < array->Length(); n++) {
649
76126
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
650
76126
    if (val->IsString()) {
651
38063
      Utf8Value value(env->isolate(), val.As<String>());
652
38063
      vec.emplace_back(*value, value.length());
653
    }
654
  }
655
6424
  return vec;
656
}
657
658
6424
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
659
6424
  url_data base;
660
6424
  Local<Context> context = env->context();
661
662
  Local<Value> flags =
663
19272
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
664
6424
  if (flags->IsInt32())
665
12848
    base.flags = flags->Int32Value(context).FromJust();
666
667
  Local<Value> port =
668
19272
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
669
6424
  if (port->IsInt32())
670
16
    base.port = port->Int32Value(context).FromJust();
671
672
  Local<Value> scheme =
673
12848
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
674
6424
  base.scheme = Utf8Value(env->isolate(), scheme).out();
675
676
  auto GetStr = [&](std::string url_data::*member,
677
                    int flag,
678
                    Local<String> name,
679
32120
                    bool empty_as_present) {
680
64240
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
681
64240
    if (value->IsString()) {
682
36236
      Utf8Value utf8value(env->isolate(), value.As<String>());
683
18118
      (base.*member).assign(*utf8value, utf8value.length());
684

30966
      if (empty_as_present || value.As<String>()->Length() != 0) {
685
5292
        base.flags |= flag;
686
      }
687
    }
688
38544
  };
689
6424
  GetStr(&url_data::username,
690
         URL_FLAGS_HAS_USERNAME,
691
         env->username_string(),
692
         false);
693
6424
  GetStr(&url_data::password,
694
         URL_FLAGS_HAS_PASSWORD,
695
         env->password_string(),
696
         false);
697
6424
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
698
6424
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
699
6424
  GetStr(&url_data::fragment,
700
         URL_FLAGS_HAS_FRAGMENT,
701
         env->fragment_string(),
702
         true);
703
704
  Local<Value>
705
19272
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
706
6424
  if (path->IsArray()) {
707
6424
    base.flags |= URL_FLAGS_HAS_PATH;
708
6424
    base.path = FromJSStringArray(env, path.As<Array>());
709
  }
710
6424
  return base;
711
}
712
713
39788
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
714
39788
  url_data context;
715
  Local<Value> flags =
716
119364
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
717
39788
  if (flags->IsInt32()) {
718
    static constexpr int32_t kCopyFlagsMask =
719
        URL_FLAGS_SPECIAL |
720
        URL_FLAGS_CANNOT_BE_BASE |
721
        URL_FLAGS_HAS_USERNAME |
722
        URL_FLAGS_HAS_PASSWORD |
723
        URL_FLAGS_HAS_HOST;
724
39788
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
725
  }
726
  Local<Value> scheme =
727
119364
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
728
79576
  if (scheme->IsString()) {
729
79576
    Utf8Value value(env->isolate(), scheme);
730
39788
    context.scheme.assign(*value, value.length());
731
  }
732
  Local<Value> port =
733
119364
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
734
39788
  if (port->IsInt32())
735
237
    context.port = port.As<Int32>()->Value();
736
39788
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
737
    Local<Value> username =
738
221
        context_obj->Get(env->context(),
739
663
                         env->username_string()).ToLocalChecked();
740
442
    CHECK(username->IsString());
741
442
    Utf8Value value(env->isolate(), username);
742
221
    context.username.assign(*value, value.length());
743
  }
744
39788
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
745
    Local<Value> password =
746
209
        context_obj->Get(env->context(),
747
627
                         env->password_string()).ToLocalChecked();
748
418
    CHECK(password->IsString());
749
418
    Utf8Value value(env->isolate(), password);
750
209
    context.password.assign(*value, value.length());
751
  }
752
  Local<Value> host =
753
39788
      context_obj->Get(env->context(),
754
119364
                       env->host_string()).ToLocalChecked();
755
79576
  if (host->IsString()) {
756
79500
    Utf8Value value(env->isolate(), host);
757
39750
    context.host.assign(*value, value.length());
758
  }
759
39788
  return context;
760
}
761
762
// Single dot segment can be ".", "%2e", or "%2E"
763
2289206
bool IsSingleDotSegment(const std::string& str) {
764
2289206
  switch (str.size()) {
765
4830
    case 1:
766
4830
      return str == ".";
767
133348
    case 3:
768
133348
      return str[0] == '%' &&
769

133394
             str[1] == '2' &&
770
133394
             ASCIILowercase(str[2]) == 'e';
771
2151028
    default:
772
2151028
      return false;
773
  }
774
}
775
776
// Double dot segment can be:
777
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
778
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
779
1148147
bool IsDoubleDotSegment(const std::string& str) {
780

1148147
  switch (str.size()) {
781
3863
    case 2:
782
3863
      return str == "..";
783
321415
    case 4:
784

321415
      if (str[0] != '.' && str[0] != '%')
785
321396
        return false;
786
19
      return ((str[0] == '.' &&
787
13
               str[1] == '%' &&
788

8
               str[2] == '2' &&
789
42
               ASCIILowercase(str[3]) == 'e') ||
790
15
              (str[0] == '%' &&
791

12
               str[1] == '2' &&
792
6
               ASCIILowercase(str[2]) == 'e' &&
793
25
               str[3] == '.'));
794
66013
    case 6:
795
66013
      return (str[0] == '%' &&
796

24
              str[1] == '2' &&
797
12
              ASCIILowercase(str[2]) == 'e' &&
798
4
              str[3] == '%' &&
799

66029
              str[4] == '2' &&
800
66017
              ASCIILowercase(str[5]) == 'e');
801
756856
    default:
802
756856
      return false;
803
  }
804
}
805
806
7760
void ShortenUrlPath(struct url_data* url) {
807
7760
  if (url->path.empty()) return;
808


7880
  if (url->path.size() == 1 && url->scheme == "file:" &&
809
442
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
810
7438
  url->path.pop_back();
811
}
812
813
}  // anonymous namespace
814
815
205560
void URL::Parse(const char* input,
816
                size_t len,
817
                enum url_parse_state state_override,
818
                struct url_data* url,
819
                bool has_url,
820
                const struct url_data* base,
821
                bool has_base) {
822
205560
  const char* p = input;
823
205560
  const char* end = input + len;
824
825
205560
  if (!has_url) {
826
132132
    for (const char* ptr = p; ptr < end; ptr++) {
827
132113
      if (IsC0ControlOrSpace(*ptr))
828
56
        p++;
829
      else
830
132057
        break;
831
    }
832
132124
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
833
132105
      if (IsC0ControlOrSpace(*ptr))
834
48
        end--;
835
      else
836
132057
        break;
837
    }
838
132076
    input = p;
839
132076
    len = end - p;
840
  }
841
842
  // The spec says we should strip out any ASCII tabs or newlines.
843
  // In those cases, we create another std::string instance with the filtered
844
  // contents, but in the general case we avoid the overhead.
845
205560
  std::string whitespace_stripped;
846
13011323
  for (const char* ptr = p; ptr < end; ptr++) {
847
12805933
    if (!IsASCIITabOrNewline(*ptr))
848
12805763
      continue;
849
    // Hit tab or newline. Allocate storage, copy what we have until now,
850
    // and then iterate and filter all similar characters out.
851
170
    whitespace_stripped.reserve(len - 1);
852
170
    whitespace_stripped.assign(p, ptr - p);
853
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
854
1023
    for (ptr = ptr + 1; ptr < end; ptr++) {
855
853
      if (!IsASCIITabOrNewline(*ptr))
856
769
        whitespace_stripped += *ptr;
857
    }
858
859
    // Update variables like they should have looked like if the string
860
    // had been stripped of whitespace to begin with.
861
170
    input = whitespace_stripped.c_str();
862
170
    len = whitespace_stripped.size();
863
170
    p = input;
864
170
    end = input + len;
865
170
    break;
866
  }
867
868
205560
  bool atflag = false;  // Set when @ has been seen.
869
205560
  bool square_bracket_flag = false;  // Set inside of [...]
870
205560
  bool password_token_seen_flag = false;  // Set after a : after an username.
871
872
205560
  std::string buffer;
873
874
  // Set the initial parse state.
875
205560
  const bool has_state_override = state_override != kUnknownState;
876
205560
  enum url_parse_state state = has_state_override ? state_override :
877
                                                    kSchemeStart;
878
879

205560
  if (state < kSchemeStart || state > kFragment) {
880
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
881
    return;
882
  }
883
884
13470950
  while (p <= end) {
885
13273833
    const char ch = p < end ? p[0] : kEOL;
886
13273833
    bool special = (url->flags & URL_FLAGS_SPECIAL);
887
    bool cannot_be_base;
888

13273833
    bool special_back_slash = (special && ch == '\\');
889
890





13273833
    switch (state) {
891
132153
      case kSchemeStart:
892
132153
        if (IsASCIIAlpha(ch)) {
893
121133
          buffer += ASCIILowercase(ch);
894
121133
          state = kScheme;
895
11020
        } else if (!has_state_override) {
896
11010
          state = kNoScheme;
897
11010
          continue;
898
        } else {
899
10
          url->flags |= URL_FLAGS_FAILED;
900
10
          return;
901
        }
902
121133
        break;
903
497090
      case kScheme:
904


497090
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
905
375957
          buffer += ASCIILowercase(ch);
906

121133
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
907

119900
          if (has_state_override && buffer.size() == 0) {
908
            url->flags |= URL_FLAGS_TERMINATED;
909
            return;
910
          }
911
119900
          buffer += ':';
912
913
119900
          bool new_is_special = IsSpecial(buffer);
914
915
119900
          if (has_state_override) {
916
39
            if ((special != new_is_special) ||
917
39
                ((buffer == "file:") &&
918
6
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
919
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
920


104
                  (url->port != -1))) ||
921
39
                  (url->scheme == "file:" && url->host.empty())) {
922
32
              url->flags |= URL_FLAGS_TERMINATED;
923
32
              return;
924
            }
925
          }
926
927
119868
          url->scheme = std::move(buffer);
928
119868
          url->port = NormalizePort(url->scheme, url->port);
929
119868
          if (new_is_special) {
930
115134
            url->flags |= URL_FLAGS_SPECIAL;
931
115134
            special = true;
932
          } else {
933
4734
            url->flags &= ~URL_FLAGS_SPECIAL;
934
4734
            special = false;
935
          }
936

119868
          special_back_slash = (special && ch == '\\');
937
119868
          buffer.clear();
938
119868
          if (has_state_override)
939
27
            return;
940
119841
          if (url->scheme == "file:") {
941
111897
            state = kFile;
942
3226
          } else if (special &&
943

11170
                     has_base &&
944
1027
                     url->scheme == base->scheme) {
945
317
            state = kSpecialRelativeOrAuthority;
946
7627
          } else if (special) {
947
2909
            state = kSpecialAuthoritySlashes;
948

4718
          } else if (p + 1 < end && p[1] == '/') {
949
714
            state = kPathOrAuthority;
950
714
            p++;
951
          } else {
952
4004
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
953
4004
            url->flags |= URL_FLAGS_HAS_PATH;
954
4004
            url->path.emplace_back("");
955
4004
            state = kCannotBeBase;
956
119841
          }
957
1233
        } else if (!has_state_override) {
958
1225
          buffer.clear();
959
1225
          state = kNoScheme;
960
1225
          p = input;
961
1225
          continue;
962
        } else {
963
8
          url->flags |= URL_FLAGS_FAILED;
964
8
          return;
965
        }
966
495798
        break;
967
12235
      case kNoScheme:
968

12235
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
969

12235
        if (!has_base || (cannot_be_base && ch != '#')) {
970
7471
          url->flags |= URL_FLAGS_FAILED;
971
7471
          return;
972

4764
        } else if (cannot_be_base && ch == '#') {
973
28
          url->scheme = base->scheme;
974
28
          if (IsSpecial(url->scheme)) {
975
            url->flags |= URL_FLAGS_SPECIAL;
976
            special = true;
977
          } else {
978
28
            url->flags &= ~URL_FLAGS_SPECIAL;
979
28
            special = false;
980
          }
981

28
          special_back_slash = (special && ch == '\\');
982
28
          if (base->flags & URL_FLAGS_HAS_PATH) {
983
28
            url->flags |= URL_FLAGS_HAS_PATH;
984
28
            url->path = base->path;
985
          }
986
28
          if (base->flags & URL_FLAGS_HAS_QUERY) {
987
4
            url->flags |= URL_FLAGS_HAS_QUERY;
988
4
            url->query = base->query;
989
          }
990
28
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
991
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
992
            url->fragment = base->fragment;
993
          }
994
28
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
995
28
          state = kFragment;
996

9472
        } else if (has_base &&
997
4736
                   base->scheme != "file:") {
998
323
          state = kRelative;
999
323
          continue;
1000
        } else {
1001
4413
          url->scheme = "file:";
1002
4413
          url->flags |= URL_FLAGS_SPECIAL;
1003
4413
          special = true;
1004
4413
          state = kFile;
1005

4413
          special_back_slash = (special && ch == '\\');
1006
4413
          continue;
1007
        }
1008
28
        break;
1009
317
      case kSpecialRelativeOrAuthority:
1010

317
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1011
285
          state = kSpecialAuthorityIgnoreSlashes;
1012
285
          p++;
1013
        } else {
1014
32
          state = kRelative;
1015
32
          continue;
1016
        }
1017
285
        break;
1018
714
      case kPathOrAuthority:
1019
714
        if (ch == '/') {
1020
546
          state = kAuthority;
1021
        } else {
1022
168
          state = kPath;
1023
168
          continue;
1024
        }
1025
546
        break;
1026
355
      case kRelative:
1027
355
        url->scheme = base->scheme;
1028
355
        if (IsSpecial(url->scheme)) {
1029
255
          url->flags |= URL_FLAGS_SPECIAL;
1030
255
          special = true;
1031
        } else {
1032
100
          url->flags &= ~URL_FLAGS_SPECIAL;
1033
100
          special = false;
1034
        }
1035

355
        special_back_slash = (special && ch == '\\');
1036

355
        switch (ch) {
1037
18
          case kEOL:
1038
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1039
4
              url->flags |= URL_FLAGS_HAS_USERNAME;
1040
4
              url->username = base->username;
1041
            }
1042
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1043
4
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1044
4
              url->password = base->password;
1045
            }
1046
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1047
16
              url->flags |= URL_FLAGS_HAS_HOST;
1048
16
              url->host = base->host;
1049
            }
1050
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1051
              url->flags |= URL_FLAGS_HAS_QUERY;
1052
              url->query = base->query;
1053
            }
1054
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1055
18
              url->flags |= URL_FLAGS_HAS_PATH;
1056
18
              url->path = base->path;
1057
            }
1058
18
            url->port = base->port;
1059
18
            break;
1060
76
          case '/':
1061
76
            state = kRelativeSlash;
1062
76
            break;
1063
38
          case '?':
1064
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1065
              url->flags |= URL_FLAGS_HAS_USERNAME;
1066
              url->username = base->username;
1067
            }
1068
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1069
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1070
              url->password = base->password;
1071
            }
1072
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1073
34
              url->flags |= URL_FLAGS_HAS_HOST;
1074
34
              url->host = base->host;
1075
            }
1076
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1077
38
              url->flags |= URL_FLAGS_HAS_PATH;
1078
38
              url->path = base->path;
1079
            }
1080
38
            url->port = base->port;
1081
38
            state = kQuery;
1082
38
            break;
1083
38
          case '#':
1084
38
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1085
              url->flags |= URL_FLAGS_HAS_USERNAME;
1086
              url->username = base->username;
1087
            }
1088
38
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1089
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1090
              url->password = base->password;
1091
            }
1092
38
            if (base->flags & URL_FLAGS_HAS_HOST) {
1093
34
              url->flags |= URL_FLAGS_HAS_HOST;
1094
34
              url->host = base->host;
1095
            }
1096
38
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1097
              url->flags |= URL_FLAGS_HAS_QUERY;
1098
              url->query = base->query;
1099
            }
1100
38
            if (base->flags & URL_FLAGS_HAS_PATH) {
1101
38
              url->flags |= URL_FLAGS_HAS_PATH;
1102
38
              url->path = base->path;
1103
            }
1104
38
            url->port = base->port;
1105
38
            state = kFragment;
1106
38
            break;
1107
185
          default:
1108
185
            if (special_back_slash) {
1109
18
              state = kRelativeSlash;
1110
            } else {
1111
167
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1112
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1113
1
                url->username = base->username;
1114
              }
1115
167
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1116
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1117
1
                url->password = base->password;
1118
              }
1119
167
              if (base->flags & URL_FLAGS_HAS_HOST) {
1120
147
                url->flags |= URL_FLAGS_HAS_HOST;
1121
147
                url->host = base->host;
1122
              }
1123
167
              if (base->flags & URL_FLAGS_HAS_PATH) {
1124
167
                url->flags |= URL_FLAGS_HAS_PATH;
1125
167
                url->path = base->path;
1126
167
                ShortenUrlPath(url);
1127
              }
1128
167
              url->port = base->port;
1129
167
              state = kPath;
1130
167
              continue;
1131
            }
1132
        }
1133
188
        break;
1134
94
      case kRelativeSlash:
1135


94
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1136
22
          state = kSpecialAuthorityIgnoreSlashes;
1137
72
        } else if (ch == '/') {
1138
6
          state = kAuthority;
1139
        } else {
1140
66
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1141
8
            url->flags |= URL_FLAGS_HAS_USERNAME;
1142
8
            url->username = base->username;
1143
          }
1144
66
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1145
4
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1146
4
            url->password = base->password;
1147
          }
1148
66
          if (base->flags & URL_FLAGS_HAS_HOST) {
1149
58
            url->flags |= URL_FLAGS_HAS_HOST;
1150
58
            url->host = base->host;
1151
          }
1152
66
          url->port = base->port;
1153
66
          state = kPath;
1154
66
          continue;
1155
        }
1156
28
        break;
1157
2909
      case kSpecialAuthoritySlashes:
1158
2909
        state = kSpecialAuthorityIgnoreSlashes;
1159

2909
        if (ch == '/' && p + 1 < end && p[1] == '/') {
1160
2760
          p++;
1161
        } else {
1162
149
          continue;
1163
        }
1164
2760
        break;
1165
3293
      case kSpecialAuthorityIgnoreSlashes:
1166

3293
        if (ch != '/' && ch != '\\') {
1167
3216
          state = kAuthority;
1168
3216
          continue;
1169
        }
1170
77
        break;
1171
84411
      case kAuthority:
1172
84411
        if (ch == '@') {
1173
563
          if (atflag) {
1174
41
            buffer.reserve(buffer.size() + 3);
1175
41
            buffer.insert(0, "%40");
1176
          }
1177
563
          atflag = true;
1178
563
          size_t blen = buffer.size();
1179

563
          if (blen > 0 && buffer[0] != ':') {
1180
467
            url->flags |= URL_FLAGS_HAS_USERNAME;
1181
          }
1182
6632
          for (size_t n = 0; n < blen; n++) {
1183
6069
            const char bch = buffer[n];
1184
6069
            if (bch == ':') {
1185
442
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1186
442
              if (!password_token_seen_flag) {
1187
426
                password_token_seen_flag = true;
1188
426
                continue;
1189
              }
1190
            }
1191
5643
            if (password_token_seen_flag) {
1192
2714
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1193
            } else {
1194
2929
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1195
            }
1196
          }
1197
563
          buffer.clear();
1198

83848
        } else if (ch == kEOL ||
1199
80150
                   ch == '/' ||
1200
80118
                   ch == '?' ||
1201
80100
                   ch == '#' ||
1202
                   special_back_slash) {
1203

3768
          if (atflag && buffer.size() == 0) {
1204
52
            url->flags |= URL_FLAGS_FAILED;
1205
52
            return;
1206
          }
1207
3716
          p -= buffer.size() + 1;
1208
3716
          buffer.clear();
1209
3716
          state = kHost;
1210
        } else {
1211
80080
          buffer += ch;
1212
        }
1213
84359
        break;
1214
79244
      case kHost:
1215
      case kHostname:
1216

79244
        if (has_state_override && url->scheme == "file:") {
1217
12
          state = kFileHost;
1218
12
          continue;
1219

79232
        } else if (ch == ':' && !square_bracket_flag) {
1220
1020
          if (buffer.size() == 0) {
1221
24
            url->flags |= URL_FLAGS_FAILED;
1222
24
            return;
1223
          }
1224
996
          if (state_override == kHostname) {
1225
4
            return;
1226
          }
1227
992
          url->flags |= URL_FLAGS_HAS_HOST;
1228
992
          if (!ParseHost(buffer, &url->host, special)) {
1229
5
            url->flags |= URL_FLAGS_FAILED;
1230
5
            return;
1231
          }
1232
987
          buffer.clear();
1233
987
          state = kPort;
1234

78212
        } else if (ch == kEOL ||
1235
75232
                   ch == '/' ||
1236
75192
                   ch == '?' ||
1237
75166
                   ch == '#' ||
1238
                   special_back_slash) {
1239
3070
          p--;
1240

3070
          if (special && buffer.size() == 0) {
1241
21
            url->flags |= URL_FLAGS_FAILED;
1242
21
            return;
1243
          }
1244
325
          if (has_state_override &&
1245

3412
              buffer.size() == 0 &&
1246
80
              ((url->username.size() > 0 || url->password.size() > 0) ||
1247
38
               url->port != -1)) {
1248
8
            url->flags |= URL_FLAGS_TERMINATED;
1249
8
            return;
1250
          }
1251
3041
          url->flags |= URL_FLAGS_HAS_HOST;
1252
3041
          if (!ParseHost(buffer, &url->host, special)) {
1253
432
            url->flags |= URL_FLAGS_FAILED;
1254
432
            return;
1255
          }
1256
2609
          buffer.clear();
1257
2609
          state = kPathStart;
1258
2609
          if (has_state_override) {
1259
221
            return;
1260
          }
1261
        } else {
1262
75142
          if (ch == '[')
1263
139
            square_bracket_flag = true;
1264
75142
          if (ch == ']')
1265
135
            square_bracket_flag = false;
1266
75142
          buffer += ch;
1267
        }
1268
78517
        break;
1269
5533
      case kPort:
1270
5533
        if (IsASCIIDigit(ch)) {
1271
4483
          buffer += ch;
1272

1050
        } else if (has_state_override ||
1273
547
                   ch == kEOL ||
1274
36
                   ch == '/' ||
1275
36
                   ch == '?' ||
1276
36
                   ch == '#' ||
1277
                   special_back_slash) {
1278
1014
          if (buffer.size() > 0) {
1279
1000
            unsigned port = 0;
1280
            // the condition port <= 0xffff prevents integer overflow
1281

5267
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1282
4267
              port = port * 10 + buffer[i] - '0';
1283
1000
            if (port > 0xffff) {
1284
              // TODO(TimothyGu): This hack is currently needed for the host
1285
              // setter since it needs access to hostname if it is valid, and
1286
              // if the FAILED flag is set the entire response to JS layer
1287
              // will be empty.
1288
26
              if (state_override == kHost)
1289
2
                url->port = -1;
1290
              else
1291
24
                url->flags |= URL_FLAGS_FAILED;
1292
26
              return;
1293
            }
1294
            // the port is valid
1295
974
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1296
974
            if (url->port == -1)
1297
47
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1298
974
            buffer.clear();
1299
14
          } else if (has_state_override) {
1300
            // TODO(TimothyGu): Similar case as above.
1301
6
            if (state_override == kHost)
1302
2
              url->port = -1;
1303
            else
1304
4
              url->flags |= URL_FLAGS_TERMINATED;
1305
6
            return;
1306
          }
1307
982
          state = kPathStart;
1308
982
          continue;
1309
        } else {
1310
36
          url->flags |= URL_FLAGS_FAILED;
1311
36
          return;
1312
        }
1313
4483
        break;
1314
116310
      case kFile:
1315
116310
        url->scheme = "file:";
1316
116310
        url->host.clear();
1317
116310
        url->flags |= URL_FLAGS_HAS_HOST;
1318

116310
        if (ch == '/' || ch == '\\') {
1319
112013
          state = kFileSlash;
1320

4297
        } else if (has_base && base->scheme == "file:") {
1321

4278
          switch (ch) {
1322
4
            case kEOL:
1323
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1324
4
                url->host = base->host;
1325
              }
1326
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1327
4
                url->flags |= URL_FLAGS_HAS_PATH;
1328
4
                url->path = base->path;
1329
              }
1330
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1331
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1332
4
                url->query = base->query;
1333
              }
1334
4
              break;
1335
4
            case '?':
1336
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1337
4
                url->host = base->host;
1338
              }
1339
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1340
4
                url->flags |= URL_FLAGS_HAS_PATH;
1341
4
                url->path = base->path;
1342
              }
1343
4
              url->flags |= URL_FLAGS_HAS_QUERY;
1344
4
              url->query.clear();
1345
4
              state = kQuery;
1346
4
              break;
1347
4
            case '#':
1348
4
              if (base->flags & URL_FLAGS_HAS_HOST) {
1349
4
                url->host = base->host;
1350
              }
1351
4
              if (base->flags & URL_FLAGS_HAS_PATH) {
1352
4
                url->flags |= URL_FLAGS_HAS_PATH;
1353
4
                url->path = base->path;
1354
              }
1355
4
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1356
4
                url->flags |= URL_FLAGS_HAS_QUERY;
1357
4
                url->query = base->query;
1358
              }
1359
4
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1360
4
              url->fragment.clear();
1361
4
              state = kFragment;
1362
4
              break;
1363
4266
            default:
1364
4266
              url->query.clear();
1365
4266
              if (base->flags & URL_FLAGS_HAS_HOST) {
1366
4266
                url->host = base->host;
1367
              }
1368
4266
              if (base->flags & URL_FLAGS_HAS_PATH) {
1369
4266
                url->flags |= URL_FLAGS_HAS_PATH;
1370
4266
                url->path = base->path;
1371
              }
1372
4266
              if (!StartsWithWindowsDriveLetter(p, end)) {
1373
4242
                ShortenUrlPath(url);
1374
              } else {
1375
24
                url->path.clear();
1376
              }
1377
4266
              state = kPath;
1378
4266
              continue;
1379
          }
1380
        } else {
1381
19
          state = kPath;
1382
19
          continue;
1383
        }
1384
112025
        break;
1385
112013
      case kFileSlash:
1386

112013
        if (ch == '/' || ch == '\\') {
1387
111873
          state = kFileHost;
1388
        } else {
1389

140
          if (has_base && base->scheme == "file:") {
1390
126
            url->flags |= URL_FLAGS_HAS_HOST;
1391
126
            url->host = base->host;
1392

238
            if (!StartsWithWindowsDriveLetter(p, end) &&
1393
112
                IsNormalizedWindowsDriveLetter(base->path[0])) {
1394
4
              url->flags |= URL_FLAGS_HAS_PATH;
1395
4
              url->path.push_back(base->path[0]);
1396
            }
1397
          }
1398
140
          state = kPath;
1399
140
          continue;
1400
        }
1401
111873
        break;
1402
112993
      case kFileHost:
1403

112993
        if (ch == kEOL ||
1404
1118
            ch == '/' ||
1405
1108
            ch == '\\' ||
1406
1108
            ch == '?' ||
1407
            ch == '#') {
1408
111873
          if (!has_state_override &&
1409

223758
              buffer.size() == 2 &&
1410
22
              IsWindowsDriveLetter(buffer)) {
1411
12
            state = kPath;
1412
111873
          } else if (buffer.size() == 0) {
1413
111679
            url->flags |= URL_FLAGS_HAS_HOST;
1414
111679
            url->host.clear();
1415
111679
            if (has_state_override)
1416
4
              return;
1417
111675
            state = kPathStart;
1418
          } else {
1419
194
            std::string host;
1420
194
            if (!ParseHost(buffer, &host, special)) {
1421
52
              url->flags |= URL_FLAGS_FAILED;
1422
52
              return;
1423
            }
1424
142
            if (host == "localhost")
1425
37
              host.clear();
1426
142
            url->flags |= URL_FLAGS_HAS_HOST;
1427
142
            url->host = host;
1428
142
            if (has_state_override)
1429
4
              return;
1430
138
            buffer.clear();
1431
138
            state = kPathStart;
1432
          }
1433
111825
          continue;
1434
        } else {
1435
1108
          buffer += ch;
1436
        }
1437
1108
        break;
1438
187920
      case kPathStart:
1439
187920
        if (IsSpecial(url->scheme)) {
1440
187374
          state = kPath;
1441

187374
          if (ch != '/' && ch != '\\') {
1442
73452
            continue;
1443
          }
1444

546
        } else if (!has_state_override && ch == '?') {
1445
6
          url->flags |= URL_FLAGS_HAS_QUERY;
1446
6
          url->query.clear();
1447
6
          state = kQuery;
1448

540
        } else if (!has_state_override && ch == '#') {
1449
6
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1450
6
          url->fragment.clear();
1451
6
          state = kFragment;
1452
534
        } else if (ch != kEOL) {
1453
457
          state = kPath;
1454
457
          if (ch != '/') {
1455
35
            continue;
1456
          }
1457

77
        } else if (has_state_override && !(url->flags & URL_FLAGS_HAS_HOST)) {
1458
2
          url->flags |= URL_FLAGS_HAS_PATH;
1459
2
          url->path.emplace_back("");
1460
        }
1461
114433
        break;
1462
11886959
      case kPath:
1463

11886959
        if (ch == kEOL ||
1464
10739403
            ch == '/' ||
1465
10739333
            special_back_slash ||
1466

10739333
            (!has_state_override && (ch == '?' || ch == '#'))) {
1467
1148147
          if (IsDoubleDotSegment(buffer)) {
1468
3351
            ShortenUrlPath(url);
1469

3351
            if (ch != '/' && !special_back_slash) {
1470
269
              url->flags |= URL_FLAGS_HAS_PATH;
1471
269
              url->path.emplace_back("");
1472
            }
1473
1146697
          } else if (IsSingleDotSegment(buffer) &&
1474

1146697
                     ch != '/' && !special_back_slash) {
1475
386
            url->flags |= URL_FLAGS_HAS_PATH;
1476
386
            url->path.emplace_back("");
1477
1144410
          } else if (!IsSingleDotSegment(buffer)) {
1478
2280811
            if (url->scheme == "file:" &&
1479
1289191
                url->path.empty() &&
1480

2432086
                buffer.size() == 2 &&
1481
100
                IsWindowsDriveLetter(buffer)) {
1482
98
              buffer[1] = ':';
1483
            }
1484
1142895
            url->flags |= URL_FLAGS_HAS_PATH;
1485
1142895
            url->path.emplace_back(std::move(buffer));
1486
          }
1487
1148147
          buffer.clear();
1488
2296294
          if (ch == '?') {
1489
480
            url->flags |= URL_FLAGS_HAS_QUERY;
1490
480
            url->query.clear();
1491
480
            state = kQuery;
1492
1147667
          } else if (ch == '#') {
1493
41
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1494
41
            url->fragment.clear();
1495
41
            state = kFragment;
1496
          }
1497
        } else {
1498
10738812
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1499
        }
1500
11886959
        break;
1501
29275
      case kCannotBeBase:
1502
29275
        switch (ch) {
1503
4
          case '?':
1504
4
            state = kQuery;
1505
4
            break;
1506
10
          case '#':
1507
10
            state = kFragment;
1508
10
            break;
1509
29261
          default:
1510
29261
            if (url->path.empty())
1511
              url->path.emplace_back("");
1512
29261
            else if (ch != kEOL)
1513
25271
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1514
        }
1515
29275
        break;
1516
5918
      case kQuery:
1517

5918
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1518
685
          url->flags |= URL_FLAGS_HAS_QUERY;
1519
685
          url->query = std::move(buffer);
1520
685
          buffer.clear();
1521
1060
          if (ch == '#')
1522
375
            state = kFragment;
1523
        } else {
1524
5233
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
1525
                                                QUERY_ENCODE_SET_NONSPECIAL);
1526
        }
1527
5918
        break;
1528
4097
      case kFragment:
1529
4097
        switch (ch) {
1530
570
          case kEOL:
1531
570
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1532
570
            url->fragment = std::move(buffer);
1533
570
            break;
1534
3527
          default:
1535
3527
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1536
        }
1537
4097
        break;
1538
      default:
1539
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1540
        return;
1541
    }
1542
1543
13053890
    p++;
1544
  }
1545
}  // NOLINT(readability/fn_size)
1546
1547
// https://url.spec.whatwg.org/#url-serializing
1548
33696
std::string URL::SerializeURL(const struct url_data* url,
1549
                              bool exclude = false) {
1550
33696
  std::string output = url->scheme;
1551
33696
  if (url->flags & URL_FLAGS_HAS_HOST) {
1552
33696
    output += "//";
1553
33696
    if (url->flags & URL_FLAGS_HAS_USERNAME ||
1554
33696
        url->flags & URL_FLAGS_HAS_PASSWORD) {
1555
      if (url->flags & URL_FLAGS_HAS_USERNAME) {
1556
        output += url->username;
1557
      }
1558
      if (url->flags & URL_FLAGS_HAS_PASSWORD) {
1559
        output += ":" + url->password;
1560
      }
1561
      output += "@";
1562
    }
1563
33696
    output += url->host;
1564
33696
    if (url->port != -1) {
1565
      output += ":" + std::to_string(url->port);
1566
    }
1567
  }
1568
33696
  if (url->flags & URL_FLAGS_CANNOT_BE_BASE) {
1569
    output += url->path[0];
1570
  } else {
1571
    if (!(url->flags & URL_FLAGS_HAS_HOST) &&
1572

33696
          url->path.size() > 1 &&
1573
          url->path[0].empty()) {
1574
      output += "/.";
1575
    }
1576
358899
    for (size_t i = 1; i < url->path.size(); i++) {
1577
325203
      output += "/" + url->path[i];
1578
    }
1579
  }
1580
33696
  if (url->flags & URL_FLAGS_HAS_QUERY) {
1581
    output = "?" + url->query;
1582
  }
1583

33696
  if (!exclude && url->flags & URL_FLAGS_HAS_FRAGMENT) {
1584
    output = "#" + url->fragment;
1585
  }
1586
33696
  return output;
1587
}
1588
1589
namespace {
1590
129964
void SetArgs(Environment* env,
1591
             Local<Value> argv[ARG_COUNT],
1592
             const struct url_data& url) {
1593
129964
  Isolate* isolate = env->isolate();
1594
129964
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1595
259928
  argv[ARG_PROTOCOL] =
1596
129964
      url.flags & URL_FLAGS_SPECIAL ?
1597
125005
          GetSpecial(env, url.scheme) :
1598
4959
          OneByteString(isolate, url.scheme.c_str());
1599
129964
  if (url.flags & URL_FLAGS_HAS_USERNAME)
1600
1220
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
1601
129964
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
1602
1180
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
1603
129964
  if (url.flags & URL_FLAGS_HAS_HOST)
1604
251398
    argv[ARG_HOST] = Utf8String(isolate, url.host);
1605
129964
  if (url.flags & URL_FLAGS_HAS_QUERY)
1606
1386
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
1607
129964
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
1608
1132
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
1609
129964
  if (url.port > -1)
1610
2192
    argv[ARG_PORT] = Integer::New(isolate, url.port);
1611
129964
  if (url.flags & URL_FLAGS_HAS_PATH)
1612
258784
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
1613
129964
}
1614
1615
138139
void Parse(Environment* env,
1616
           Local<Value> recv,
1617
           const char* input,
1618
           size_t len,
1619
           enum url_parse_state state_override,
1620
           Local<Value> base_obj,
1621
           Local<Value> context_obj,
1622
           Local<Function> cb,
1623
           Local<Value> error_cb) {
1624
138139
  Isolate* isolate = env->isolate();
1625
138139
  Local<Context> context = env->context();
1626
138139
  HandleScope handle_scope(isolate);
1627
138139
  Context::Scope context_scope(context);
1628
1629
138139
  const bool has_context = context_obj->IsObject();
1630
138139
  const bool has_base = base_obj->IsObject();
1631
1632
138139
  url_data base;
1633
138139
  url_data url;
1634
138139
  if (has_context)
1635
39788
    url = HarvestContext(env, context_obj.As<Object>());
1636
138139
  if (has_base)
1637
6424
    base = HarvestBase(env, base_obj.As<Object>());
1638
1639
138139
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1640

138139
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1641
39788
      ((state_override != kUnknownState) &&
1642
39788
       (url.flags & URL_FLAGS_TERMINATED)))
1643
44
    return;
1644
1645
  // Define the return value placeholders
1646
138095
  const Local<Value> undef = Undefined(isolate);
1647
138095
  const Local<Value> null = Null(isolate);
1648
138095
  if (!(url.flags & URL_FLAGS_FAILED)) {
1649
    Local<Value> argv[] = {
1650
      undef,
1651
      undef,
1652
      undef,
1653
      undef,
1654
      null,  // host defaults to null
1655
      null,  // port defaults to null
1656
      undef,
1657
      null,  // query defaults to null
1658
      null,  // fragment defaults to null
1659
129964
    };
1660
129964
    SetArgs(env, argv, url);
1661
259928
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1662
8131
  } else if (error_cb->IsFunction()) {
1663
8001
    Local<Value> argv[2] = { undef, undef };
1664
8001
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1665
8001
    argv[ERR_ARG_INPUT] =
1666
16002
      String::NewFromUtf8(env->isolate(), input).ToLocalChecked();
1667
8001
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1668
8001
        .FromMaybe(Local<Value>());
1669
  }
1670
}
1671
1672
138139
void Parse(const FunctionCallbackInfo<Value>& args) {
1673
138139
  Environment* env = Environment::GetCurrent(args);
1674
138139
  CHECK_GE(args.Length(), 5);
1675
276278
  CHECK(args[0]->IsString());  // input
1676


375126
  CHECK(args[2]->IsUndefined() ||  // base context
1677
        args[2]->IsNull() ||
1678
        args[2]->IsObject());
1679


395642
  CHECK(args[3]->IsUndefined() ||  // context
1680
        args[3]->IsNull() ||
1681
        args[3]->IsObject());
1682
138139
  CHECK(args[4]->IsFunction());  // complete callback
1683

374629
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1684
1685
138139
  Utf8Value input(env->isolate(), args[0]);
1686
138139
  enum url_parse_state state_override = kUnknownState;
1687
138139
  if (args[1]->IsNumber()) {
1688
138139
    state_override = static_cast<enum url_parse_state>(
1689
276278
        args[1]->Uint32Value(env->context()).FromJust());
1690
  }
1691
1692
276278
  Parse(env, args.This(),
1693
138139
        *input, input.length(),
1694
        state_override,
1695
        args[2],
1696
        args[3],
1697
276278
        args[4].As<Function>(),
1698
        args[5]);
1699
138139
}
1700
1701
92
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1702
92
  Environment* env = Environment::GetCurrent(args);
1703
92
  CHECK_GE(args.Length(), 1);
1704
184
  CHECK(args[0]->IsString());
1705
184
  Utf8Value value(env->isolate(), args[0]);
1706
92
  std::string output;
1707
92
  size_t len = value.length();
1708
92
  output.reserve(len);
1709
756
  for (size_t n = 0; n < len; n++) {
1710
664
    const char ch = (*value)[n];
1711
664
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
1712
  }
1713
276
  args.GetReturnValue().Set(
1714
184
      String::NewFromUtf8(env->isolate(), output.c_str()).ToLocalChecked());
1715
92
}
1716
1717
229
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
1718
229
  Environment* env = Environment::GetCurrent(args);
1719
229
  CHECK_GE(args.Length(), 1);
1720
458
  CHECK(args[0]->IsString());
1721
229
  Utf8Value value(env->isolate(), args[0]);
1722
1723
229
  URLHost host;
1724
  // Assuming the host is used for a special scheme.
1725
229
  host.ParseHost(*value, value.length(), true);
1726
229
  if (host.ParsingFailed()) {
1727
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1728
12
    return;
1729
  }
1730
217
  std::string out = host.ToStringMove();
1731
651
  args.GetReturnValue().Set(
1732
434
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1733
}
1734
1735
207
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
1736
207
  Environment* env = Environment::GetCurrent(args);
1737
207
  CHECK_GE(args.Length(), 1);
1738
414
  CHECK(args[0]->IsString());
1739
207
  Utf8Value value(env->isolate(), args[0]);
1740
1741
207
  URLHost host;
1742
  // Assuming the host is used for a special scheme.
1743
207
  host.ParseHost(*value, value.length(), true, true);
1744
207
  if (host.ParsingFailed()) {
1745
12
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
1746
12
    return;
1747
  }
1748
195
  std::string out = host.ToStringMove();
1749
585
  args.GetReturnValue().Set(
1750
390
      String::NewFromUtf8(env->isolate(), out.c_str()).ToLocalChecked());
1751
}
1752
1753
611
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
1754
611
  Environment* env = Environment::GetCurrent(args);
1755
611
  CHECK_EQ(args.Length(), 1);
1756
611
  CHECK(args[0]->IsFunction());
1757
1222
  env->set_url_constructor_function(args[0].As<Function>());
1758
611
}
1759
1760
611
void Initialize(Local<Object> target,
1761
                Local<Value> unused,
1762
                Local<Context> context,
1763
                void* priv) {
1764
611
  Environment* env = Environment::GetCurrent(context);
1765
611
  env->SetMethod(target, "parse", Parse);
1766
611
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
1767
611
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
1768
611
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
1769
611
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
1770
1771
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
1772
16497
  FLAGS(XX)
1773
#undef XX
1774
1775
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
1776
25662
  PARSESTATES(XX)
1777
#undef XX
1778
611
}
1779
}  // namespace
1780
1781
4794
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
1782
4794
  registry->Register(Parse);
1783
4794
  registry->Register(EncodeAuthSet);
1784
4794
  registry->Register(DomainToASCII);
1785
4794
  registry->Register(DomainToUnicode);
1786
4794
  registry->Register(SetURLConstructor);
1787
4794
}
1788
1789
8
std::string URL::ToFilePath() const {
1790
8
  if (context_.scheme != "file:") {
1791
1
    return "";
1792
  }
1793
1794
#ifdef _WIN32
1795
  const char* slash = "\\";
1796
  auto is_slash = [] (char ch) {
1797
    return ch == '/' || ch == '\\';
1798
  };
1799
#else
1800
7
  const char* slash = "/";
1801
46
  auto is_slash = [] (char ch) {
1802
46
    return ch == '/';
1803
  };
1804

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1805
7
      context_.host.length() > 0) {
1806
1
    return "";
1807
  }
1808
#endif
1809
12
  std::string decoded_path;
1810
18
  for (const std::string& part : context_.path) {
1811
13
    std::string decoded = PercentDecode(part.c_str(), part.length());
1812
58
    for (char& ch : decoded) {
1813
46
      if (is_slash(ch)) {
1814
1
        return "";
1815
      }
1816
    }
1817
12
    decoded_path += slash + decoded;
1818
  }
1819
1820
#ifdef _WIN32
1821
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
1822
1823
  // If hostname is set, then we have a UNC path. Pass the hostname through
1824
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
1825
  // need to worry about percent encoding because the URL parser will have
1826
  // already taken care of that for us. Note that this only causes IDNs with an
1827
  // appropriate `xn--` prefix to be decoded.
1828
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
1829
      context_.host.length() > 0) {
1830
    std::string unicode_host;
1831
    if (!ToUnicode(context_.host, &unicode_host)) {
1832
      return "";
1833
    }
1834
    return "\\\\" + unicode_host + decoded_path;
1835
  }
1836
  // Otherwise, it's a local path that requires a drive letter.
1837
  if (decoded_path.length() < 3) {
1838
    return "";
1839
  }
1840
  if (decoded_path[2] != ':' ||
1841
      !IsASCIIAlpha(decoded_path[1])) {
1842
    return "";
1843
  }
1844
  // Strip out the leading '\'.
1845
  return decoded_path.substr(1);
1846
#else
1847
5
  return decoded_path;
1848
#endif
1849
}
1850
1851
33696
URL URL::FromFilePath(const std::string& file_path) {
1852
67392
  URL url("file://");
1853
67392
  std::string escaped_file_path;
1854
3617823
  for (size_t i = 0; i < file_path.length(); ++i) {
1855
3584127
    escaped_file_path += file_path[i];
1856
3584127
    if (file_path[i] == '%')
1857
11
      escaped_file_path += "25";
1858
  }
1859
33696
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
1860
             &url.context_, true, nullptr, false);
1861
33696
  return url;
1862
}
1863
1864
// This function works by calling out to a JS function that creates and
1865
// returns the JS URL object. Be mindful of the JS<->Native boundary
1866
// crossing that is required.
1867
MaybeLocal<Value> URL::ToObject(Environment* env) const {
1868
  Isolate* isolate = env->isolate();
1869
  Local<Context> context = env->context();
1870
  Context::Scope context_scope(context);
1871
1872
  const Local<Value> undef = Undefined(isolate);
1873
  const Local<Value> null = Null(isolate);
1874
1875
  if (context_.flags & URL_FLAGS_FAILED)
1876
    return Local<Value>();
1877
1878
  Local<Value> argv[] = {
1879
    undef,
1880
    undef,
1881
    undef,
1882
    undef,
1883
    null,  // host defaults to null
1884
    null,  // port defaults to null
1885
    undef,
1886
    null,  // query defaults to null
1887
    null,  // fragment defaults to null
1888
  };
1889
  SetArgs(env, argv, context_);
1890
1891
  MaybeLocal<Value> ret;
1892
  {
1893
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
1894
1895
    // The SetURLConstructor method must have been called already to
1896
    // set the constructor function used below. SetURLConstructor is
1897
    // called automatically when the internal/url.js module is loaded
1898
    // during the internal/bootstrap/node.js processing.
1899
    ret = env->url_constructor_function()
1900
        ->Call(env->context(), undef, arraysize(argv), argv);
1901
  }
1902
1903
  return ret;
1904
}
1905
1906
}  // namespace url
1907
}  // namespace node
1908
1909
4863
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)
1910
4794
NODE_MODULE_EXTERNAL_REFERENCE(url, node::url::RegisterExternalReferences)