GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage/nodes/benchmark/out/../src/node_url.cc Lines: 1138 1162 97.9 %
Date: 2017-12-18 Branches: 1064 1196 89.0 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "node_internals.h"
3
#include "base_object-inl.h"
4
#include "node_i18n.h"
5
6
#include <string>
7
#include <vector>
8
#include <stdio.h>
9
#include <cmath>
10
11
namespace node {
12
13
using v8::Array;
14
using v8::Context;
15
using v8::Function;
16
using v8::FunctionCallbackInfo;
17
using v8::HandleScope;
18
using v8::Integer;
19
using v8::Isolate;
20
using v8::Local;
21
using v8::MaybeLocal;
22
using v8::Null;
23
using v8::Object;
24
using v8::String;
25
using v8::TryCatch;
26
using v8::Undefined;
27
using v8::Value;
28
29
#define GET(env, obj, name)                                                   \
30
  obj->Get(env->context(),                                                    \
31
           OneByteString(env->isolate(), name)).ToLocalChecked()
32
33
#define GET_AND_SET(env, obj, name, data, flag)                               \
34
  {                                                                           \
35
    Local<Value> val = GET(env, obj, #name);                                  \
36
    if (val->IsString()) {                                                    \
37
      Utf8Value value(env->isolate(), val.As<String>());                      \
38
      data->name = *value;                                                    \
39
      data->flags |= flag;                                                    \
40
    }                                                                         \
41
  }
42
43
#define UTF8STRING(isolate, str)                                              \
44
  String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal)       \
45
    .ToLocalChecked()
46
47
namespace url {
48
49
namespace {
50
51
// https://url.spec.whatwg.org/#eof-code-point
52
const char kEOL = -1;
53
54
// Used in ToUSVString().
55
const char16_t kUnicodeReplacementCharacter = 0xFFFD;
56
57
// https://url.spec.whatwg.org/#concept-host
58
2021
class URLHost {
59
 public:
60
  ~URLHost();
61
62
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
63
  void ParseIPv6Host(const char* input, size_t length);
64
  void ParseOpaqueHost(const char* input, size_t length);
65
  void ParseHost(const char* input,
66
                 size_t length,
67
                 bool is_special,
68
                 bool unicode = false);
69
70
2021
  inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
71
  std::string ToString() const;
72
73
 private:
74
  enum class HostType {
75
    H_FAILED,
76
    H_DOMAIN,
77
    H_IPV4,
78
    H_IPV6,
79
    H_OPAQUE,
80
  };
81
82
  union Value {
83
    std::string domain;
84
    uint32_t ipv4;
85
    uint16_t ipv6[8];
86
    std::string opaque;
87
88
2021
    ~Value() {}
89
2021
    Value() : ipv4(0) {}
90
  };
91
92
  Value value_;
93
  HostType type_ = HostType::H_FAILED;
94
95
  // Setting the string members of the union with = is brittle because
96
  // it relies on them being initialized to a state that requires no
97
  // destruction of old data.
98
  // For a long time, that worked well enough because ParseIPv6Host() happens
99
  // to zero-fill `value_`, but that really is relying on standard library
100
  // internals too much.
101
  // These helpers are the easiest solution but we might want to consider
102
  // just not forcing strings into an union.
103
120
  inline void SetOpaque(std::string&& string) {
104
120
    type_ = HostType::H_OPAQUE;
105
120
    new(&value_.opaque) std::string(std::move(string));
106
120
  }
107
108
1669
  inline void SetDomain(std::string&& string) {
109
1669
    type_ = HostType::H_DOMAIN;
110
1669
    new(&value_.domain) std::string(std::move(string));
111
1669
  }
112
};
113
114
4042
URLHost::~URLHost() {
115
  using string = std::string;
116
2021
  switch (type_) {
117
1669
    case HostType::H_DOMAIN: value_.domain.~string(); break;
118
120
    case HostType::H_OPAQUE: value_.opaque.~string(); break;
119
232
    default: break;
120
  }
121
2021
}
122
123
#define ARGS(XX)                                                              \
124
  XX(ARG_FLAGS)                                                               \
125
  XX(ARG_PROTOCOL)                                                            \
126
  XX(ARG_USERNAME)                                                            \
127
  XX(ARG_PASSWORD)                                                            \
128
  XX(ARG_HOST)                                                                \
129
  XX(ARG_PORT)                                                                \
130
  XX(ARG_PATH)                                                                \
131
  XX(ARG_QUERY)                                                               \
132
  XX(ARG_FRAGMENT)
133
134
#define ERR_ARGS(XX)                                                          \
135
  XX(ERR_ARG_FLAGS)                                                           \
136
  XX(ERR_ARG_INPUT)                                                           \
137
138
enum url_cb_args {
139
#define XX(name) name,
140
  ARGS(XX)
141
#undef XX
142
};
143
144
enum url_error_cb_args {
145
#define XX(name) name,
146
  ERR_ARGS(XX)
147
#undef XX
148
};
149
150
#define CHAR_TEST(bits, name, expr)                                           \
151
  template <typename T>                                                       \
152
  inline bool name(const T ch) {                                              \
153
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
154
                  "Character must be wider than " #bits " bits");             \
155
    return (expr);                                                            \
156
  }
157
158
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
159
  template <typename T>                                                       \
160
  inline bool name(const T ch1, const T ch2) {                                \
161
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
162
                  "Character must be wider than " #bits " bits");             \
163
    return (expr);                                                            \
164
  }                                                                           \
165
  template <typename T>                                                       \
166
  inline bool name(const std::basic_string<T>& str) {                         \
167
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
168
                  "Character must be wider than " #bits " bits");             \
169
    return str.length() >= 2 && name(str[0], str[1]);                         \
170
  }
171
172
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
173

112649
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
174
175
// https://infra.spec.whatwg.org/#c0-control-or-space
176

6526
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
177
178
// https://infra.spec.whatwg.org/#ascii-digit
179

16941
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
180
181
// https://infra.spec.whatwg.org/#ascii-hex-digit
182


942
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
183
                               (ch >= 'A' && ch <= 'F') ||
184
                               (ch >= 'a' && ch <= 'f')))
185
186
// https://infra.spec.whatwg.org/#ascii-alpha
187


27539
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
188
                            (ch >= 'a' && ch <= 'z')))
189
190
// https://infra.spec.whatwg.org/#ascii-alphanumeric
191

11721
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
192
193
// https://infra.spec.whatwg.org/#ascii-lowercase
194
template <typename T>
195
11756
inline T ASCIILowercase(T ch) {
196
11756
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
197
}
198
199
// https://url.spec.whatwg.org/#forbidden-host-code-point
200







29991
CHAR_TEST(8, IsForbiddenHostCodePoint,
201
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
202
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
203
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
204
          ch == '\\' || ch == ']')
205
206
// https://url.spec.whatwg.org/#windows-drive-letter
207


503
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
208
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
209
210
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
211


495
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
212
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
213
214
// If a UTF-16 character is a low/trailing surrogate.
215
1
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
216
217
// If a UTF-16 character is a surrogate.
218
21
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
219
220
// If a UTF-16 surrogate is a low/trailing one.
221
13
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
222
223
#undef CHAR_TEST
224
#undef TWO_CHAR_STRING_TEST
225
226
const char* hex[256] = {
227
  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
228
  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
229
  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
230
  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
231
  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
232
  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
233
  "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
234
  "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
235
  "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
236
  "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
237
  "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
238
  "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
239
  "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
240
  "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
241
  "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
242
  "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
243
  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
244
  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
245
  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
246
  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
247
  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
248
  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
249
  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
250
  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
251
  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
252
  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
253
  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
254
  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
255
  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
256
  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
257
  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
258
  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
259
};
260
261
const uint8_t C0_CONTROL_ENCODE_SET[32] = {
262
  // 00     01     02     03     04     05     06     07
263
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
264
  // 08     09     0A     0B     0C     0D     0E     0F
265
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
266
  // 10     11     12     13     14     15     16     17
267
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
268
  // 18     19     1A     1B     1C     1D     1E     1F
269
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
270
  // 20     21     22     23     24     25     26     27
271
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
272
  // 28     29     2A     2B     2C     2D     2E     2F
273
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
274
  // 30     31     32     33     34     35     36     37
275
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
276
  // 38     39     3A     3B     3C     3D     3E     3F
277
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
278
  // 40     41     42     43     44     45     46     47
279
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
280
  // 48     49     4A     4B     4C     4D     4E     4F
281
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
282
  // 50     51     52     53     54     55     56     57
283
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
284
  // 58     59     5A     5B     5C     5D     5E     5F
285
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
286
  // 60     61     62     63     64     65     66     67
287
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
288
  // 68     69     6A     6B     6C     6D     6E     6F
289
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
290
  // 70     71     72     73     74     75     76     77
291
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
292
  // 78     79     7A     7B     7C     7D     7E     7F
293
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
294
  // 80     81     82     83     84     85     86     87
295
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
296
  // 88     89     8A     8B     8C     8D     8E     8F
297
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
298
  // 90     91     92     93     94     95     96     97
299
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
300
  // 98     99     9A     9B     9C     9D     9E     9F
301
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
302
  // A0     A1     A2     A3     A4     A5     A6     A7
303
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
304
  // A8     A9     AA     AB     AC     AD     AE     AF
305
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
306
  // B0     B1     B2     B3     B4     B5     B6     B7
307
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
308
  // B8     B9     BA     BB     BC     BD     BE     BF
309
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
310
  // C0     C1     C2     C3     C4     C5     C6     C7
311
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
312
  // C8     C9     CA     CB     CC     CD     CE     CF
313
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
314
  // D0     D1     D2     D3     D4     D5     D6     D7
315
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
316
  // D8     D9     DA     DB     DC     DD     DE     DF
317
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
318
  // E0     E1     E2     E3     E4     E5     E6     E7
319
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
320
  // E8     E9     EA     EB     EC     ED     EE     EF
321
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
322
  // F0     F1     F2     F3     F4     F5     F6     F7
323
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
324
  // F8     F9     FA     FB     FC     FD     FE     FF
325
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
326
};
327
328
const uint8_t FRAGMENT_ENCODE_SET[32] = {
329
  // 00     01     02     03     04     05     06     07
330
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
331
  // 08     09     0A     0B     0C     0D     0E     0F
332
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
333
  // 10     11     12     13     14     15     16     17
334
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
335
  // 18     19     1A     1B     1C     1D     1E     1F
336
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
337
  // 20     21     22     23     24     25     26     27
338
    0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
339
  // 28     29     2A     2B     2C     2D     2E     2F
340
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
341
  // 30     31     32     33     34     35     36     37
342
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
343
  // 38     39     3A     3B     3C     3D     3E     3F
344
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
345
  // 40     41     42     43     44     45     46     47
346
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
347
  // 48     49     4A     4B     4C     4D     4E     4F
348
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
349
  // 50     51     52     53     54     55     56     57
350
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
351
  // 58     59     5A     5B     5C     5D     5E     5F
352
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
353
  // 60     61     62     63     64     65     66     67
354
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
355
  // 68     69     6A     6B     6C     6D     6E     6F
356
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
357
  // 70     71     72     73     74     75     76     77
358
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
359
  // 78     79     7A     7B     7C     7D     7E     7F
360
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
361
  // 80     81     82     83     84     85     86     87
362
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
363
  // 88     89     8A     8B     8C     8D     8E     8F
364
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
365
  // 90     91     92     93     94     95     96     97
366
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
367
  // 98     99     9A     9B     9C     9D     9E     9F
368
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
369
  // A0     A1     A2     A3     A4     A5     A6     A7
370
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
371
  // A8     A9     AA     AB     AC     AD     AE     AF
372
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
373
  // B0     B1     B2     B3     B4     B5     B6     B7
374
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
375
  // B8     B9     BA     BB     BC     BD     BE     BF
376
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
377
  // C0     C1     C2     C3     C4     C5     C6     C7
378
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
379
  // C8     C9     CA     CB     CC     CD     CE     CF
380
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
381
  // D0     D1     D2     D3     D4     D5     D6     D7
382
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
383
  // D8     D9     DA     DB     DC     DD     DE     DF
384
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
385
  // E0     E1     E2     E3     E4     E5     E6     E7
386
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
387
  // E8     E9     EA     EB     EC     ED     EE     EF
388
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
389
  // F0     F1     F2     F3     F4     F5     F6     F7
390
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
391
  // F8     F9     FA     FB     FC     FD     FE     FF
392
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
393
};
394
395
396
const uint8_t PATH_ENCODE_SET[32] = {
397
  // 00     01     02     03     04     05     06     07
398
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
399
  // 08     09     0A     0B     0C     0D     0E     0F
400
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
401
  // 10     11     12     13     14     15     16     17
402
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
403
  // 18     19     1A     1B     1C     1D     1E     1F
404
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
405
  // 20     21     22     23     24     25     26     27
406
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
407
  // 28     29     2A     2B     2C     2D     2E     2F
408
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
409
  // 30     31     32     33     34     35     36     37
410
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
411
  // 38     39     3A     3B     3C     3D     3E     3F
412
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
413
  // 40     41     42     43     44     45     46     47
414
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
415
  // 48     49     4A     4B     4C     4D     4E     4F
416
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
417
  // 50     51     52     53     54     55     56     57
418
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
419
  // 58     59     5A     5B     5C     5D     5E     5F
420
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
421
  // 60     61     62     63     64     65     66     67
422
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
423
  // 68     69     6A     6B     6C     6D     6E     6F
424
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
425
  // 70     71     72     73     74     75     76     77
426
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
427
  // 78     79     7A     7B     7C     7D     7E     7F
428
    0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
429
  // 80     81     82     83     84     85     86     87
430
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
431
  // 88     89     8A     8B     8C     8D     8E     8F
432
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
433
  // 90     91     92     93     94     95     96     97
434
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
435
  // 98     99     9A     9B     9C     9D     9E     9F
436
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
437
  // A0     A1     A2     A3     A4     A5     A6     A7
438
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
439
  // A8     A9     AA     AB     AC     AD     AE     AF
440
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
441
  // B0     B1     B2     B3     B4     B5     B6     B7
442
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
443
  // B8     B9     BA     BB     BC     BD     BE     BF
444
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
445
  // C0     C1     C2     C3     C4     C5     C6     C7
446
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
447
  // C8     C9     CA     CB     CC     CD     CE     CF
448
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
449
  // D0     D1     D2     D3     D4     D5     D6     D7
450
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
451
  // D8     D9     DA     DB     DC     DD     DE     DF
452
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
453
  // E0     E1     E2     E3     E4     E5     E6     E7
454
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
455
  // E8     E9     EA     EB     EC     ED     EE     EF
456
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
457
  // F0     F1     F2     F3     F4     F5     F6     F7
458
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
459
  // F8     F9     FA     FB     FC     FD     FE     FF
460
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
461
};
462
463
const uint8_t USERINFO_ENCODE_SET[32] = {
464
  // 00     01     02     03     04     05     06     07
465
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
466
  // 08     09     0A     0B     0C     0D     0E     0F
467
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
468
  // 10     11     12     13     14     15     16     17
469
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
470
  // 18     19     1A     1B     1C     1D     1E     1F
471
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
472
  // 20     21     22     23     24     25     26     27
473
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
474
  // 28     29     2A     2B     2C     2D     2E     2F
475
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
476
  // 30     31     32     33     34     35     36     37
477
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
478
  // 38     39     3A     3B     3C     3D     3E     3F
479
    0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
480
  // 40     41     42     43     44     45     46     47
481
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
482
  // 48     49     4A     4B     4C     4D     4E     4F
483
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
484
  // 50     51     52     53     54     55     56     57
485
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
486
  // 58     59     5A     5B     5C     5D     5E     5F
487
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
488
  // 60     61     62     63     64     65     66     67
489
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
490
  // 68     69     6A     6B     6C     6D     6E     6F
491
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
492
  // 70     71     72     73     74     75     76     77
493
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
494
  // 78     79     7A     7B     7C     7D     7E     7F
495
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
496
  // 80     81     82     83     84     85     86     87
497
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
498
  // 88     89     8A     8B     8C     8D     8E     8F
499
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
500
  // 90     91     92     93     94     95     96     97
501
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
502
  // 98     99     9A     9B     9C     9D     9E     9F
503
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
504
  // A0     A1     A2     A3     A4     A5     A6     A7
505
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
506
  // A8     A9     AA     AB     AC     AD     AE     AF
507
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
508
  // B0     B1     B2     B3     B4     B5     B6     B7
509
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
510
  // B8     B9     BA     BB     BC     BD     BE     BF
511
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
512
  // C0     C1     C2     C3     C4     C5     C6     C7
513
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
514
  // C8     C9     CA     CB     CC     CD     CE     CF
515
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
516
  // D0     D1     D2     D3     D4     D5     D6     D7
517
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
518
  // D8     D9     DA     DB     DC     DD     DE     DF
519
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
520
  // E0     E1     E2     E3     E4     E5     E6     E7
521
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
522
  // E8     E9     EA     EB     EC     ED     EE     EF
523
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
524
  // F0     F1     F2     F3     F4     F5     F6     F7
525
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
526
  // F8     F9     FA     FB     FC     FD     FE     FF
527
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
528
};
529
530
const uint8_t QUERY_ENCODE_SET[32] = {
531
  // 00     01     02     03     04     05     06     07
532
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
533
  // 08     09     0A     0B     0C     0D     0E     0F
534
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
535
  // 10     11     12     13     14     15     16     17
536
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
537
  // 18     19     1A     1B     1C     1D     1E     1F
538
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
539
  // 20     21     22     23     24     25     26     27
540
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
541
  // 28     29     2A     2B     2C     2D     2E     2F
542
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
543
  // 30     31     32     33     34     35     36     37
544
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
545
  // 38     39     3A     3B     3C     3D     3E     3F
546
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
547
  // 40     41     42     43     44     45     46     47
548
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
549
  // 48     49     4A     4B     4C     4D     4E     4F
550
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
551
  // 50     51     52     53     54     55     56     57
552
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
553
  // 58     59     5A     5B     5C     5D     5E     5F
554
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
555
  // 60     61     62     63     64     65     66     67
556
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
557
  // 68     69     6A     6B     6C     6D     6E     6F
558
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
559
  // 70     71     72     73     74     75     76     77
560
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
561
  // 78     79     7A     7B     7C     7D     7E     7F
562
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
563
  // 80     81     82     83     84     85     86     87
564
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
565
  // 88     89     8A     8B     8C     8D     8E     8F
566
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
567
  // 90     91     92     93     94     95     96     97
568
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
569
  // 98     99     9A     9B     9C     9D     9E     9F
570
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
571
  // A0     A1     A2     A3     A4     A5     A6     A7
572
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
573
  // A8     A9     AA     AB     AC     AD     AE     AF
574
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
575
  // B0     B1     B2     B3     B4     B5     B6     B7
576
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
577
  // B8     B9     BA     BB     BC     BD     BE     BF
578
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
579
  // C0     C1     C2     C3     C4     C5     C6     C7
580
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
581
  // C8     C9     CA     CB     CC     CD     CE     CF
582
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
583
  // D0     D1     D2     D3     D4     D5     D6     D7
584
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
585
  // D8     D9     DA     DB     DC     DD     DE     DF
586
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
587
  // E0     E1     E2     E3     E4     E5     E6     E7
588
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
589
  // E8     E9     EA     EB     EC     ED     EE     EF
590
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
591
  // F0     F1     F2     F3     F4     F5     F6     F7
592
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
593
  // F8     F9     FA     FB     FC     FD     FE     FF
594
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
595
};
596
597
63793
inline bool BitAt(const uint8_t a[], const uint8_t i) {
598
63793
  return !!(a[i >> 3] & (1 << (i & 7)));
599
}
600
601
// Appends ch to str. If ch position in encode_set is set, the ch will
602
// be percent-encoded then appended.
603
63793
inline void AppendOrEscape(std::string* str,
604
                           const unsigned char ch,
605
                           const uint8_t encode_set[]) {
606
63793
  if (BitAt(encode_set, ch))
607
488
    *str += hex[ch];
608
  else
609
63305
    *str += ch;
610
63793
}
611
612
template <typename T>
613
677
inline unsigned hex2bin(const T ch) {
614

677
  if (ch >= '0' && ch <= '9')
615
586
    return ch - '0';
616

91
  if (ch >= 'A' && ch <= 'F')
617
22
    return 10 + (ch - 'A');
618

69
  if (ch >= 'a' && ch <= 'f')
619
69
    return 10 + (ch - 'a');
620
  return static_cast<unsigned>(-1);
621
}
622
623
4538
inline std::string PercentDecode(const char* input, size_t len) {
624
4538
  std::string dest;
625
4538
  if (len == 0)
626
2
    return dest;
627
4536
  dest.reserve(len);
628
4536
  const char* pointer = input;
629
4536
  const char* end = input + len;
630
631
63904
  while (pointer < end) {
632
54832
    const char ch = pointer[0];
633
54832
    const size_t remaining = end - pointer - 1;
634


109411
    if (ch != '%' || remaining < 2 ||
635
259
        (ch == '%' &&
636
516
         (!IsASCIIHexDigit(pointer[1]) ||
637
257
          !IsASCIIHexDigit(pointer[2])))) {
638
54579
      dest += ch;
639
54579
      pointer++;
640
54579
      continue;
641
    } else {
642
253
      unsigned a = hex2bin(pointer[1]);
643
253
      unsigned b = hex2bin(pointer[2]);
644
253
      char c = static_cast<char>(a * 16 + b);
645
253
      dest += c;
646
253
      pointer += 3;
647
    }
648
  }
649
4536
  return dest;
650
}
651
652
#define SPECIALS(XX)                                                          \
653
  XX("ftp:", 21)                                                              \
654
  XX("file:", -1)                                                             \
655
  XX("gopher:", 70)                                                           \
656
  XX("http:", 80)                                                             \
657
  XX("https:", 443)                                                           \
658
  XX("ws:", 80)                                                               \
659
  XX("wss:", 443)
660
661
4755
inline bool IsSpecial(std::string scheme) {
662
#define XX(name, _) if (scheme == name) return true;
663



4755
  SPECIALS(XX);
664
#undef XX
665
984
  return false;
666
}
667
668
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
669
420
inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
670
420
  const size_t length = end - p;
671
417
  return length >= 2 &&
672

449
    IsWindowsDriveLetter(p[0], p[1]) &&
673
14
    (length == 2 ||
674
21
      p[2] == '/' ||
675
10
      p[2] == '\\' ||
676
5
      p[2] == '?' ||
677
422
      p[2] == '#');
678
}
679
680
2898
inline int NormalizePort(std::string scheme, int p) {
681
#define XX(name, port) if (scheme == name && p == port) return -1;
682










2898
  SPECIALS(XX);
683
#undef XX
684
2409
  return p;
685
}
686
687
#if defined(NODE_HAVE_I18N_SUPPORT)
688
192
inline bool ToUnicode(const std::string& input, std::string* output) {
689
192
  MaybeStackBuffer<char> buf;
690
192
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
691
    return false;
692
192
  output->assign(*buf, buf.length());
693
192
  return true;
694
}
695
696
1815
inline bool ToASCII(const std::string& input, std::string* output) {
697
1815
  MaybeStackBuffer<char> buf;
698
1815
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
699
61
    return false;
700
1754
  output->assign(*buf, buf.length());
701
1754
  return true;
702
}
703
#else
704
// Intentional non-ops if ICU is not present.
705
inline bool ToUnicode(const std::string& input, std::string* output) {
706
  *output = input;
707
  return true;
708
}
709
710
inline bool ToASCII(const std::string& input, std::string* output) {
711
  *output = input;
712
  return true;
713
}
714
#endif
715
716
66
void URLHost::ParseIPv6Host(const char* input, size_t length) {
717
66
  CHECK_EQ(type_, HostType::H_FAILED);
718
594
  for (unsigned n = 0; n < 8; n++)
719
528
    value_.ipv6[n] = 0;
720
66
  uint16_t* piece_pointer = &value_.ipv6[0];
721
66
  uint16_t* const buffer_end = piece_pointer + 8;
722
66
  uint16_t* compress_pointer = nullptr;
723
66
  const char* pointer = input;
724
66
  const char* end = pointer + length;
725
  unsigned value, len, swaps, numbers_seen;
726
66
  char ch = pointer < end ? pointer[0] : kEOL;
727
66
  if (ch == ':') {
728

24
    if (length < 2 || pointer[1] != ':')
729
2
      return;
730
22
    pointer += 2;
731
22
    ch = pointer < end ? pointer[0] : kEOL;
732
22
    piece_pointer++;
733
22
    compress_pointer = piece_pointer;
734
  }
735
257
  while (ch != kEOL) {
736
171
    if (piece_pointer >= buffer_end)
737
2
      return;
738
169
    if (ch == ':') {
739
14
      if (compress_pointer != nullptr)
740
2
        return;
741
12
      pointer++;
742
12
      ch = pointer < end ? pointer[0] : kEOL;
743
12
      piece_pointer++;
744
12
      compress_pointer = piece_pointer;
745
12
      continue;
746
    }
747
155
    value = 0;
748
155
    len = 0;
749

481
    while (len < 4 && IsASCIIHexDigit(ch)) {
750
171
      value = value * 0x10 + hex2bin(ch);
751
171
      pointer++;
752
171
      ch = pointer < end ? pointer[0] : kEOL;
753
171
      len++;
754
    }
755

155
    switch (ch) {
756
      case '.':
757
32
        if (len == 0)
758
2
          return;
759
30
        pointer -= len;
760
30
        ch = pointer < end ? pointer[0] : kEOL;
761
30
        if (piece_pointer > buffer_end - 2)
762
2
          return;
763
28
        numbers_seen = 0;
764
130
        while (ch != kEOL) {
765
96
          value = 0xffffffff;
766
96
          if (numbers_seen > 0) {
767

68
            if (ch == '.' && numbers_seen < 4) {
768
62
              pointer++;
769
62
              ch = pointer < end ? pointer[0] : kEOL;
770
            } else {
771
6
              return;
772
            }
773
          }
774
90
          if (!IsASCIIDigit(ch))
775
12
            return;
776
252
          while (IsASCIIDigit(ch)) {
777
100
            unsigned number = ch - '0';
778
100
            if (value == 0xffffffff) {
779
78
              value = number;
780
22
            } else if (value == 0) {
781
2
              return;
782
            } else {
783
20
              value = value * 10 + number;
784
            }
785
98
            if (value > 255)
786
2
              return;
787
96
            pointer++;
788
96
            ch = pointer < end ? pointer[0] : kEOL;
789
          }
790
74
          *piece_pointer = *piece_pointer * 0x100 + value;
791
74
          numbers_seen++;
792

74
          if (numbers_seen == 2 || numbers_seen == 4)
793
30
            piece_pointer++;
794
        }
795
6
        if (numbers_seen != 4)
796
2
          return;
797
4
        continue;
798
      case ':':
799
101
        pointer++;
800
101
        ch = pointer < end ? pointer[0] : kEOL;
801
101
        if (ch == kEOL)
802
2
          return;
803
99
        break;
804
      case kEOL:
805
14
        break;
806
      default:
807
8
        return;
808
    }
809
113
    *piece_pointer = value;
810
113
    piece_pointer++;
811
  }
812
813
22
  if (compress_pointer != nullptr) {
814
14
    swaps = piece_pointer - compress_pointer;
815
14
    piece_pointer = buffer_end - 1;
816

44
    while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
817
16
      uint16_t temp = *piece_pointer;
818
16
      uint16_t* swap_piece = compress_pointer + swaps - 1;
819
16
      *piece_pointer = *swap_piece;
820
16
      *swap_piece = temp;
821
16
       piece_pointer--;
822
16
       swaps--;
823
    }
824

8
  } else if (compress_pointer == nullptr &&
825
             piece_pointer != buffer_end) {
826
2
    return;
827
  }
828
20
  type_ = HostType::H_IPV6;
829
}
830
831
1810
inline int64_t ParseNumber(const char* start, const char* end) {
832
1810
  unsigned R = 10;
833

1810
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
834
24
    start += 2;
835
24
    R = 16;
836
  }
837
1810
  if (end - start == 0) {
838
4
    return 0;
839

1806
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
840
32
    start++;
841
32
    R = 8;
842
  }
843
1806
  const char* p = start;
844
845
4208
  while (p < end) {
846
2257
    const char ch = p[0];
847

2257
    switch (R) {
848
      case 8:
849

173
        if (ch < '0' || ch > '7')
850
19
          return -1;
851
154
        break;
852
      case 10:
853
1980
        if (!IsASCIIDigit(ch))
854
1640
          return -1;
855
340
        break;
856
      case 16:
857
104
        if (!IsASCIIHexDigit(ch))
858
2
          return -1;
859
102
        break;
860
    }
861
596
    p++;
862
  }
863
145
  return strtoll(start, nullptr, R);
864
}
865
866
1720
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
867
1720
  CHECK_EQ(type_, HostType::H_FAILED);
868
1720
  *is_ipv4 = false;
869
1720
  const char* pointer = input;
870
1720
  const char* mark = input;
871
1720
  const char* end = pointer + length;
872
1720
  int parts = 0;
873
1720
  uint32_t val = 0;
874
  uint64_t numbers[4];
875
1720
  int tooBigNumbers = 0;
876
1720
  if (length == 0)
877
1683
    return;
878
879
19661
  while (pointer <= end) {
880
17892
    const char ch = pointer < end ? pointer[0] : kEOL;
881
17892
    const int remaining = end - pointer - 1;
882

17892
    if (ch == '.' || ch == kEOL) {
883
1818
      if (++parts > 4)
884
2
        return;
885
1816
      if (pointer == mark)
886
6
        return;
887
1810
      int64_t n = ParseNumber(mark, pointer);
888
1810
      if (n < 0)
889
1661
        return;
890
891
149
      if (n > 255) {
892
56
        tooBigNumbers++;
893
      }
894
149
      numbers[parts - 1] = n;
895
149
      mark = pointer + 1;
896

149
      if (ch == '.' && remaining == 0)
897
2
        break;
898
    }
899
16221
    pointer++;
900
  }
901
51
  CHECK_GT(parts, 0);
902
51
  *is_ipv4 = true;
903
904
  // If any but the last item in numbers is greater than 255, return failure.
905
  // If the last item in numbers is greater than or equal to
906
  // 256^(5 - the number of items in numbers), return failure.
907

100
  if (tooBigNumbers > 1 ||
908

130
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
909
47
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
910
14
    return;
911
  }
912
913
37
  type_ = HostType::H_IPV4;
914
37
  val = numbers[parts - 1];
915
86
  for (int n = 0; n < parts - 1; n++) {
916
49
    double b = 3 - n;
917
49
    val += numbers[n] * pow(256, b);
918
  }
919
920
37
  value_.ipv4 = val;
921
}
922
923
136
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
924
136
  CHECK_EQ(type_, HostType::H_FAILED);
925
136
  std::string output;
926
136
  output.reserve(length * 3);
927
879
  for (size_t i = 0; i < length; i++) {
928
759
    const char ch = input[i];
929

759
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
930
152
      return;
931
    } else {
932
743
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
933
    }
934
  }
935
936
120
  SetOpaque(std::move(output));
937
}
938
939
2021
void URLHost::ParseHost(const char* input,
940
                        size_t length,
941
                        bool is_special,
942
                        bool unicode) {
943
2021
  CHECK_EQ(type_, HostType::H_FAILED);
944
2021
  const char* pointer = input;
945
946
2021
  if (length == 0)
947
352
    return;
948
949
2021
  if (pointer[0] == '[') {
950
70
    if (pointer[length - 1] != ']')
951
4
      return;
952
66
    return ParseIPv6Host(++pointer, length - 2);
953
  }
954
955
1951
  if (!is_special)
956
136
    return ParseOpaqueHost(input, length);
957
958
  // First, we have to percent decode
959
1815
  std::string decoded = PercentDecode(input, length);
960
961
  // Then we have to punycode toASCII
962
1815
  if (!ToASCII(decoded, &decoded))
963
61
    return;
964
965
  // If any of the following characters are still present, we have to fail
966
30957
  for (size_t n = 0; n < decoded.size(); n++) {
967
29237
    const char ch = decoded[n];
968
29237
    if (IsForbiddenHostCodePoint(ch)) {
969
34
      return;
970
    }
971
  }
972
973
  // Check to see if it's an IPv4 IP address
974
  bool is_ipv4;
975
1720
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
976
1720
  if (is_ipv4)
977
51
    return;
978
979
  // If the unicode flag is set, run the result through punycode ToUnicode
980

1669
  if (unicode && !ToUnicode(decoded, &decoded))
981
    return;
982
983
  // It's not an IPv4 or IPv6 address, it must be a domain
984
1669
  SetDomain(std::move(decoded));
985
}
986
987
// Locates the longest sequence of 0 segments in an IPv6 address
988
// in order to use the :: compression when serializing
989
template<typename T>
990
20
inline T* FindLongestZeroSequence(T* values, size_t len) {
991
20
  T* start = values;
992
20
  T* end = start + len;
993
20
  T* result = nullptr;
994
995
20
  T* current = nullptr;
996
20
  unsigned counter = 0, longest = 1;
997
998
200
  while (start < end) {
999
160
    if (*start == 0) {
1000
117
      if (current == nullptr)
1001
27
        current = start;
1002
117
      counter++;
1003
    } else {
1004
43
      if (counter > longest) {
1005
16
        longest = counter;
1006
16
        result = current;
1007
      }
1008
43
      counter = 0;
1009
43
      current = nullptr;
1010
    }
1011
160
    start++;
1012
  }
1013
20
  if (counter > longest)
1014
3
    result = current;
1015
20
  return result;
1016
}
1017
1018
1846
std::string URLHost::ToString() const {
1019
1846
  std::string dest;
1020

1846
  switch (type_) {
1021
    case HostType::H_DOMAIN:
1022
1669
      return value_.domain;
1023
      break;
1024
    case HostType::H_OPAQUE:
1025
120
      return value_.opaque;
1026
      break;
1027
    case HostType::H_IPV4: {
1028
37
      dest.reserve(15);
1029
37
      uint32_t value = value_.ipv4;
1030
185
      for (int n = 0; n < 4; n++) {
1031
        char buf[4];
1032
148
        snprintf(buf, sizeof(buf), "%d", value % 256);
1033
148
        dest.insert(0, buf);
1034
148
        if (n < 3)
1035
111
          dest.insert(0, 1, '.');
1036
148
        value /= 256;
1037
      }
1038
37
      break;
1039
    }
1040
    case HostType::H_IPV6: {
1041
20
      dest.reserve(41);
1042
20
      dest += '[';
1043
20
      const uint16_t* start = &value_.ipv6[0];
1044
      const uint16_t* compress_pointer =
1045
20
          FindLongestZeroSequence(start, 8);
1046
20
      bool ignore0 = false;
1047
180
      for (int n = 0; n <= 7; n++) {
1048
160
        const uint16_t* piece = &value_.ipv6[n];
1049

160
        if (ignore0 && *piece == 0)
1050
196
          continue;
1051
71
        else if (ignore0)
1052
15
          ignore0 = false;
1053
71
        if (compress_pointer == piece) {
1054
18
          dest += n == 0 ? "::" : ":";
1055
18
          ignore0 = true;
1056
18
          continue;
1057
        }
1058
        char buf[5];
1059
53
        snprintf(buf, sizeof(buf), "%x", *piece);
1060
53
        dest += buf;
1061
53
        if (n < 7)
1062
36
          dest += ':';
1063
      }
1064
20
      dest += ']';
1065
20
      break;
1066
    }
1067
    case HostType::H_FAILED:
1068
      break;
1069
  }
1070
57
  return dest;
1071
}
1072
1073
1640
bool ParseHost(const std::string& input,
1074
               std::string* output,
1075
               bool is_special,
1076
               bool unicode = false) {
1077
1640
  if (input.length() == 0) {
1078
42
    output->clear();
1079
42
    return true;
1080
  }
1081
1598
  URLHost host;
1082
1598
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1083
1598
  if (host.ParsingFailed())
1084
157
    return false;
1085
1441
  *output = host.ToString();
1086
1441
  return true;
1087
}
1088
1089
938
inline void Copy(Environment* env,
1090
                 Local<Array> ary,
1091
                 std::vector<std::string>* vec) {
1092
938
  const int32_t len = ary->Length();
1093
938
  if (len == 0)
1094
946
    return;  // nothing to copy
1095
930
  vec->reserve(len);
1096
2291
  for (int32_t n = 0; n < len; n++) {
1097
4083
    Local<Value> val = ary->Get(env->context(), n).ToLocalChecked();
1098
2722
    if (val->IsString()) {
1099
1361
      Utf8Value value(env->isolate(), val.As<String>());
1100
1361
      vec->push_back(std::string(*value, value.length()));
1101
    }
1102
  }
1103
}
1104
1105
2722
inline Local<Array> Copy(Environment* env,
1106
                         const std::vector<std::string>& vec) {
1107
2722
  Isolate* isolate = env->isolate();
1108
2722
  Local<Array> ary = Array::New(isolate, vec.size());
1109
8901
  for (size_t n = 0; n < vec.size(); n++)
1110
24716
    ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust();
1111
2722
  return ary;
1112
}
1113
1114
938
inline void HarvestBase(Environment* env,
1115
                        struct url_data* base,
1116
                        Local<Object> base_obj) {
1117
938
  Local<Context> context = env->context();
1118
3752
  Local<Value> flags = GET(env, base_obj, "flags");
1119
938
  if (flags->IsInt32())
1120
1876
    base->flags = flags->Int32Value(context).FromJust();
1121
1122
3752
  Local<Value> scheme = GET(env, base_obj, "scheme");
1123
938
  base->scheme = Utf8Value(env->isolate(), scheme).out();
1124
1125
6566
  GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
1126
6566
  GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
1127
6077
  GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
1128
5636
  GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY);
1129
5634
  GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT);
1130
3752
  Local<Value> port = GET(env, base_obj, "port");
1131
938
  if (port->IsInt32())
1132
8
    base->port = port->Int32Value(context).FromJust();
1133
3752
  Local<Value> path = GET(env, base_obj, "path");
1134
938
  if (path->IsArray()) {
1135
938
    base->flags |= URL_FLAGS_HAS_PATH;
1136
1876
    Copy(env, path.As<Array>(), &(base->path));
1137
  }
1138
938
}
1139
1140
509
inline void HarvestContext(Environment* env,
1141
                           struct url_data* context,
1142
                           Local<Object> context_obj) {
1143
2036
  Local<Value> flags = GET(env, context_obj, "flags");
1144
509
  if (flags->IsInt32()) {
1145
1527
    int32_t _flags = flags->Int32Value(env->context()).FromJust();
1146
509
    if (_flags & URL_FLAGS_SPECIAL)
1147
451
      context->flags |= URL_FLAGS_SPECIAL;
1148
509
    if (_flags & URL_FLAGS_CANNOT_BE_BASE)
1149
4
      context->flags |= URL_FLAGS_CANNOT_BE_BASE;
1150
509
    if (_flags & URL_FLAGS_HAS_USERNAME)
1151
14
      context->flags |= URL_FLAGS_HAS_USERNAME;
1152
509
    if (_flags & URL_FLAGS_HAS_PASSWORD)
1153
8
      context->flags |= URL_FLAGS_HAS_PASSWORD;
1154
509
    if (_flags & URL_FLAGS_HAS_HOST)
1155
499
      context->flags |= URL_FLAGS_HAS_HOST;
1156
  }
1157
2036
  Local<Value> scheme = GET(env, context_obj, "scheme");
1158
1018
  if (scheme->IsString()) {
1159
509
    Utf8Value value(env->isolate(), scheme);
1160
509
    context->scheme.assign(*value, value.length());
1161
  }
1162
2036
  Local<Value> port = GET(env, context_obj, "port");
1163
509
  if (port->IsInt32())
1164
63
    context->port = port->Int32Value(env->context()).FromJust();
1165
509
  if (context->flags & URL_FLAGS_HAS_USERNAME) {
1166
56
    Local<Value> username = GET(env, context_obj, "username");
1167
28
    CHECK(username->IsString());
1168
14
    Utf8Value value(env->isolate(), username);
1169
14
    context->username.assign(*value, value.length());
1170
  }
1171
509
  if (context->flags & URL_FLAGS_HAS_PASSWORD) {
1172
32
    Local<Value> password = GET(env, context_obj, "password");
1173
16
    CHECK(password->IsString());
1174
8
    Utf8Value value(env->isolate(), password);
1175
8
    context->password.assign(*value, value.length());
1176
  }
1177
2036
  Local<Value> host = GET(env, context_obj, "host");
1178
1018
  if (host->IsString()) {
1179
499
    Utf8Value value(env->isolate(), host);
1180
499
    context->host.assign(*value, value.length());
1181
  }
1182
509
}
1183
1184
// Single dot segment can be ".", "%2e", or "%2E"
1185
15577
inline bool IsSingleDotSegment(const std::string& str) {
1186
15577
  switch (str.size()) {
1187
    case 1:
1188
541
      return str == ".";
1189
    case 3:
1190
1268
      return str[0] == '%' &&
1191

1264
             str[1] == '2' &&
1192
1264
             ASCIILowercase(str[2]) == 'e';
1193
    default:
1194
13796
      return false;
1195
  }
1196
}
1197
1198
// Double dot segment can be:
1199
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1200
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
1201
7902
inline bool IsDoubleDotSegment(const std::string& str) {
1202

7902
  switch (str.size()) {
1203
    case 2:
1204
197
      return str == "..";
1205
    case 4:
1206

1647
      if (str[0] != '.' && str[0] != '%')
1207
1640
        return false;
1208
11
      return ((str[0] == '.' &&
1209
6
               str[1] == '%' &&
1210
4
               str[2] == '2' &&
1211

19
               ASCIILowercase(str[3]) == 'e') ||
1212
8
              (str[0] == '%' &&
1213
6
               str[1] == '2' &&
1214
6
               ASCIILowercase(str[2]) == 'e' &&
1215
10
               str[3] == '.'));
1216
    case 6:
1217
174
      return (str[0] == '%' &&
1218
8
              str[1] == '2' &&
1219
6
              ASCIILowercase(str[2]) == 'e' &&
1220
4
              str[3] == '%' &&
1221

174
              str[4] == '2' &&
1222
172
              ASCIILowercase(str[5]) == 'e');
1223
    default:
1224
5888
      return false;
1225
  }
1226
}
1227
1228
346
inline void ShortenUrlPath(struct url_data* url) {
1229
346
  if (url->path.empty()) return;
1230


314
  if (url->path.size() == 1 && url->scheme == "file:" &&
1231
11
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
1232
303
  url->path.pop_back();
1233
}
1234
1235
}  // anonymous namespace
1236
1237
3850
void URL::Parse(const char* input,
1238
                size_t len,
1239
                enum url_parse_state state_override,
1240
                struct url_data* url,
1241
                bool has_url,
1242
                const struct url_data* base,
1243
                bool has_base) {
1244
3850
  const char* p = input;
1245
3850
  const char* end = input + len;
1246
1247
3850
  if (!has_url) {
1248
3351
    for (const char* ptr = p; ptr < end; ptr++) {
1249
3267
      if (IsC0ControlOrSpace(*ptr))
1250
10
        p++;
1251
      else
1252
3257
        break;
1253
    }
1254
3343
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
1255
3259
      if (IsC0ControlOrSpace(*ptr))
1256
2
        end--;
1257
      else
1258
3257
        break;
1259
    }
1260
3341
    len = end - p;
1261
  }
1262
1263
3850
  std::string whitespace_stripped;
1264
3850
  whitespace_stripped.reserve(len);
1265
116499
  for (const char* ptr = p; ptr < end; ptr++)
1266
112649
    if (!IsASCIITabOrNewline(*ptr))
1267
112585
      whitespace_stripped += *ptr;
1268
1269
3850
  input = whitespace_stripped.c_str();
1270
3850
  len = whitespace_stripped.size();
1271
3850
  p = input;
1272
3850
  end = input + len;
1273
1274
3850
  bool atflag = false;
1275
3850
  bool sbflag = false;
1276
3850
  bool uflag = false;
1277
1278
7148
  std::string buffer;
1279
3850
  url->scheme.reserve(len);
1280
3850
  url->username.reserve(len);
1281
3850
  url->password.reserve(len);
1282
3850
  url->host.reserve(len);
1283
3850
  url->path.reserve(len);
1284
3850
  url->query.reserve(len);
1285
3850
  url->fragment.reserve(len);
1286
3850
  buffer.reserve(len);
1287
1288
  // Set the initial parse state.
1289
3850
  const bool has_state_override = state_override != kUnknownState;
1290
  enum url_parse_state state = has_state_override ? state_override :
1291
3850
                                                    kSchemeStart;
1292
1293

3850
  if (state < kSchemeStart || state > kFragment) {
1294
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1295
    return;
1296
  }
1297
1298

140709
  while (p <= end) {
1299
133561
    const char ch = p < end ? p[0] : kEOL;
1300
133561
    bool special = (url->flags & URL_FLAGS_SPECIAL);
1301
    bool cannot_be_base;
1302

133561
    const bool special_back_slash = (special && ch == '\\');
1303
1304





133561
    switch (state) {
1305
      case kSchemeStart:
1306
3367
        if (IsASCIIAlpha(ch)) {
1307
2675
          buffer += ASCIILowercase(ch);
1308
2675
          state = kScheme;
1309
692
        } else if (!has_state_override) {
1310
689
          state = kNoScheme;
1311
689
          continue;
1312
        } else {
1313
3
          url->flags |= URL_FLAGS_FAILED;
1314
3
          return;
1315
        }
1316
2675
        break;
1317
      case kScheme:
1318


11721
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1319
9046
          buffer += ASCIILowercase(ch);
1320

2675
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1321

2570
          if (has_state_override && buffer.size() == 0) {
1322
            url->flags |= URL_FLAGS_TERMINATED;
1323
            return;
1324
          }
1325
2570
          buffer += ':';
1326
1327
2570
          bool new_is_special = IsSpecial(buffer);
1328
1329
2570
          if (has_state_override) {
1330

45
            if ((special != new_is_special) ||
1331
14
                ((buffer == "file:") &&
1332
4
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1333
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1334
1
                  (url->port != -1)))) {
1335
13
              url->flags |= URL_FLAGS_TERMINATED;
1336
13
              return;
1337
            }
1338
1339
            // File scheme && (host == empty or null) check left to JS-land
1340
            // as it can be done before even entering C++ binding.
1341
          }
1342
1343
2557
          url->scheme = buffer;
1344
2557
          url->port = NormalizePort(url->scheme, url->port);
1345
2557
          if (new_is_special) {
1346
1797
            url->flags |= URL_FLAGS_SPECIAL;
1347
1797
            special = true;
1348
          } else {
1349
760
            url->flags &= ~URL_FLAGS_SPECIAL;
1350
760
            special = false;
1351
          }
1352
2557
          buffer.clear();
1353
2557
          if (has_state_override)
1354
8
            return;
1355
2549
          if (url->scheme == "file:") {
1356
464
            state = kFile;
1357

3415
          } else if (special &&
1358

2593
                     has_base &&
1359
508
                     url->scheme == base->scheme) {
1360
178
            state = kSpecialRelativeOrAuthority;
1361
1907
          } else if (special) {
1362
1152
            state = kSpecialAuthoritySlashes;
1363
755
          } else if (p[1] == '/') {
1364
218
            state = kPathOrAuthority;
1365
218
            p++;
1366
          } else {
1367
537
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1368
537
            url->flags |= URL_FLAGS_HAS_PATH;
1369
537
            url->path.push_back("");
1370
537
            state = kCannotBeBase;
1371
2549
          }
1372
105
        } else if (!has_state_override) {
1373
103
          buffer.clear();
1374
103
          state = kNoScheme;
1375
103
          p = input;
1376
103
          continue;
1377
        } else {
1378
2
          url->flags |= URL_FLAGS_FAILED;
1379
2
          return;
1380
        }
1381
11595
        break;
1382
      case kNoScheme:
1383

792
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1384

792
        if (!has_base || (cannot_be_base && ch != '#')) {
1385
204
          url->flags |= URL_FLAGS_FAILED;
1386
204
          return;
1387

588
        } else if (cannot_be_base && ch == '#') {
1388
14
          url->scheme = base->scheme;
1389
14
          if (IsSpecial(url->scheme)) {
1390
            url->flags |= URL_FLAGS_SPECIAL;
1391
            special = true;
1392
          } else {
1393
14
            url->flags &= ~URL_FLAGS_SPECIAL;
1394
14
            special = false;
1395
          }
1396
14
          if (base->flags & URL_FLAGS_HAS_PATH) {
1397
14
            url->flags |= URL_FLAGS_HAS_PATH;
1398
14
            url->path = base->path;
1399
          }
1400
14
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1401
2
            url->flags |= URL_FLAGS_HAS_QUERY;
1402
2
            url->query = base->query;
1403
          }
1404
14
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1405
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1406
            url->fragment = base->fragment;
1407
          }
1408
14
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1409
14
          state = kFragment;
1410

1148
        } else if (has_base &&
1411
574
                   base->scheme != "file:") {
1412
142
          state = kRelative;
1413
142
          continue;
1414
        } else {
1415
432
          url->scheme = "file:";
1416
432
          url->flags |= URL_FLAGS_SPECIAL;
1417
432
          special = true;
1418
432
          state = kFile;
1419
432
          continue;
1420
        }
1421
14
        break;
1422
      case kSpecialRelativeOrAuthority:
1423

178
        if (ch == '/' && p[1] == '/') {
1424
162
          state = kSpecialAuthorityIgnoreSlashes;
1425
162
          p++;
1426
        } else {
1427
16
          state = kRelative;
1428
16
          continue;
1429
        }
1430
162
        break;
1431
      case kPathOrAuthority:
1432
218
        if (ch == '/') {
1433
164
          state = kAuthority;
1434
        } else {
1435
54
          state = kPath;
1436
54
          continue;
1437
        }
1438
164
        break;
1439
      case kRelative:
1440
158
        url->scheme = base->scheme;
1441
158
        if (IsSpecial(url->scheme)) {
1442
115
          url->flags |= URL_FLAGS_SPECIAL;
1443
115
          special = true;
1444
        } else {
1445
43
          url->flags &= ~URL_FLAGS_SPECIAL;
1446
43
          special = false;
1447
        }
1448

158
        switch (ch) {
1449
          case kEOL:
1450
8
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1451
8
              url->flags |= URL_FLAGS_HAS_USERNAME;
1452
8
              url->username = base->username;
1453
            }
1454
8
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1455
8
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1456
8
              url->password = base->password;
1457
            }
1458
8
            if (base->flags & URL_FLAGS_HAS_HOST) {
1459
8
              url->flags |= URL_FLAGS_HAS_HOST;
1460
8
              url->host = base->host;
1461
            }
1462
8
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1463
              url->flags |= URL_FLAGS_HAS_QUERY;
1464
              url->query = base->query;
1465
            }
1466
8
            if (base->flags & URL_FLAGS_HAS_PATH) {
1467
8
              url->flags |= URL_FLAGS_HAS_PATH;
1468
8
              url->path = base->path;
1469
            }
1470
8
            url->port = base->port;
1471
8
            break;
1472
          case '/':
1473
35
            state = kRelativeSlash;
1474
35
            break;
1475
          case '?':
1476
21
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1477
21
              url->flags |= URL_FLAGS_HAS_USERNAME;
1478
21
              url->username = base->username;
1479
            }
1480
21
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1481
21
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1482
21
              url->password = base->password;
1483
            }
1484
21
            if (base->flags & URL_FLAGS_HAS_HOST) {
1485
19
              url->flags |= URL_FLAGS_HAS_HOST;
1486
19
              url->host = base->host;
1487
            }
1488
21
            if (base->flags & URL_FLAGS_HAS_PATH) {
1489
21
              url->flags |= URL_FLAGS_HAS_PATH;
1490
21
              url->path = base->path;
1491
            }
1492
21
            url->port = base->port;
1493
21
            state = kQuery;
1494
21
            break;
1495
          case '#':
1496
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1497
18
              url->flags |= URL_FLAGS_HAS_USERNAME;
1498
18
              url->username = base->username;
1499
            }
1500
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1501
18
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1502
18
              url->password = base->password;
1503
            }
1504
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1505
16
              url->flags |= URL_FLAGS_HAS_HOST;
1506
16
              url->host = base->host;
1507
            }
1508
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1509
              url->flags |= URL_FLAGS_HAS_QUERY;
1510
              url->query = base->query;
1511
            }
1512
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1513
18
              url->flags |= URL_FLAGS_HAS_PATH;
1514
18
              url->path = base->path;
1515
            }
1516
18
            url->port = base->port;
1517
18
            state = kFragment;
1518
18
            break;
1519
          default:
1520
76
            if (special_back_slash) {
1521
4
              state = kRelativeSlash;
1522
            } else {
1523
72
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1524
69
                url->flags |= URL_FLAGS_HAS_USERNAME;
1525
69
                url->username = base->username;
1526
              }
1527
72
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1528
69
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1529
69
                url->password = base->password;
1530
              }
1531
72
              if (base->flags & URL_FLAGS_HAS_HOST) {
1532
66
                url->flags |= URL_FLAGS_HAS_HOST;
1533
66
                url->host = base->host;
1534
              }
1535
72
              if (base->flags & URL_FLAGS_HAS_PATH) {
1536
72
                url->flags |= URL_FLAGS_HAS_PATH;
1537
72
                url->path = base->path;
1538
72
                ShortenUrlPath(url);
1539
              }
1540
72
              url->port = base->port;
1541
72
              state = kPath;
1542
72
              continue;
1543
            }
1544
        }
1545
86
        break;
1546
      case kRelativeSlash:
1547


39
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1548
8
          state = kSpecialAuthorityIgnoreSlashes;
1549
31
        } else if (ch == '/') {
1550
3
          state = kAuthority;
1551
        } else {
1552
28
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1553
28
            url->flags |= URL_FLAGS_HAS_USERNAME;
1554
28
            url->username = base->username;
1555
          }
1556
28
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1557
28
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1558
28
            url->password = base->password;
1559
          }
1560
28
          if (base->flags & URL_FLAGS_HAS_HOST) {
1561
26
            url->flags |= URL_FLAGS_HAS_HOST;
1562
26
            url->host = base->host;
1563
          }
1564
28
          url->port = base->port;
1565
28
          state = kPath;
1566
28
          continue;
1567
        }
1568
11
        break;
1569
      case kSpecialAuthoritySlashes:
1570
1152
        state = kSpecialAuthorityIgnoreSlashes;
1571

1152
        if (ch == '/' && p[1] == '/') {
1572
1070
          p++;
1573
        } else {
1574
82
          continue;
1575
        }
1576
1070
        break;
1577
      case kSpecialAuthorityIgnoreSlashes:
1578

1364
        if (ch != '/' && ch != '\\') {
1579
1322
          state = kAuthority;
1580
1322
          continue;
1581
        }
1582
42
        break;
1583
      case kAuthority:
1584
18225
        if (ch == '@') {
1585
132
          if (atflag) {
1586
12
            buffer.reserve(buffer.size() + 3);
1587
12
            buffer.insert(0, "%40");
1588
          }
1589
132
          atflag = true;
1590
132
          const size_t blen = buffer.size();
1591

132
          if (blen > 0 && buffer[0] != ':') {
1592
84
            url->flags |= URL_FLAGS_HAS_USERNAME;
1593
          }
1594
648
          for (size_t n = 0; n < blen; n++) {
1595
516
            const char bch = buffer[n];
1596
516
            if (bch == ':') {
1597
78
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1598
78
              if (!uflag) {
1599
76
                uflag = true;
1600
76
                continue;
1601
              }
1602
            }
1603
440
            if (uflag) {
1604
173
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1605
            } else {
1606
267
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1607
            }
1608
          }
1609
132
          buffer.clear();
1610

18093
        } else if (ch == kEOL ||
1611
16637
                   ch == '/' ||
1612
16621
                   ch == '?' ||
1613
16612
                   ch == '#' ||
1614
                   special_back_slash) {
1615

1489
          if (atflag && buffer.size() == 0) {
1616
26
            url->flags |= URL_FLAGS_FAILED;
1617
26
            return;
1618
          }
1619
1463
          p -= buffer.size() + 1;
1620
1463
          buffer.clear();
1621
1463
          state = kHost;
1622
        } else {
1623
16604
          buffer += ch;
1624
        }
1625
18199
        break;
1626
      case kHost:
1627
      case kHostname:
1628

18715
        if (has_state_override && url->scheme == "file:") {
1629
6
          state = kFileHost;
1630
6
          continue;
1631

18709
        } else if (ch == ':' && !sbflag) {
1632
374
          if (buffer.size() == 0) {
1633
10
            url->flags |= URL_FLAGS_FAILED;
1634
10
            return;
1635
          }
1636
364
          url->flags |= URL_FLAGS_HAS_HOST;
1637
364
          if (!ParseHost(buffer, &url->host, special)) {
1638
2
            url->flags |= URL_FLAGS_FAILED;
1639
2
            return;
1640
          }
1641
362
          buffer.clear();
1642
362
          state = kPort;
1643
722
          if (state_override == kHostname) {
1644
2
            return;
1645
          }
1646

18335
        } else if (ch == kEOL ||
1647
17149
                   ch == '/' ||
1648
17129
                   ch == '?' ||
1649
17116
                   ch == '#' ||
1650
                   special_back_slash) {
1651
1229
          p--;
1652

1229
          if (special && buffer.size() == 0) {
1653
8
            url->flags |= URL_FLAGS_FAILED;
1654
8
            return;
1655
          }
1656

1340
          if (has_state_override &&
1657

1243
              buffer.size() == 0 &&
1658

50
              ((url->username.size() > 0 || url->password.size() > 0) ||
1659
16
               url->port != -1)) {
1660
4
            url->flags |= URL_FLAGS_TERMINATED;
1661
4
            return;
1662
          }
1663
1217
          url->flags |= URL_FLAGS_HAS_HOST;
1664
1217
          if (!ParseHost(buffer, &url->host, special)) {
1665
145
            url->flags |= URL_FLAGS_FAILED;
1666
145
            return;
1667
          }
1668
1072
          buffer.clear();
1669
1072
          state = kPathStart;
1670
2065
          if (has_state_override) {
1671
79
            return;
1672
          }
1673
        } else {
1674
17106
          if (ch == '[')
1675
67
            sbflag = true;
1676
17106
          if (ch == ']')
1677
65
            sbflag = false;
1678
17106
          buffer += ch;
1679
        }
1680
18459
        break;
1681
      case kPort:
1682
2034
        if (IsASCIIDigit(ch)) {
1683
1657
          buffer += ch;
1684

377
        } else if (has_state_override ||
1685
129
                   ch == kEOL ||
1686
18
                   ch == '/' ||
1687
18
                   ch == '?' ||
1688
18
                   ch == '#' ||
1689
                   special_back_slash) {
1690
359
          if (buffer.size() > 0) {
1691
354
            unsigned port = 0;
1692
            // the condition port <= 0xffff prevents integer overflow
1693

1903
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1694
1549
              port = port * 10 + buffer[i] - '0';
1695
354
            if (port > 0xffff) {
1696
              // TODO(TimothyGu): This hack is currently needed for the host
1697
              // setter since it needs access to hostname if it is valid, and
1698
              // if the FAILED flag is set the entire response to JS layer
1699
              // will be empty.
1700
13
              if (state_override == kHost)
1701
1
                url->port = -1;
1702
              else
1703
12
                url->flags |= URL_FLAGS_FAILED;
1704
13
              return;
1705
            }
1706
            // the port is valid
1707
341
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1708
341
            buffer.clear();
1709
5
          } else if (has_state_override) {
1710
            // TODO(TimothyGu): Similar case as above.
1711
1
            if (state_override == kHost)
1712
1
              url->port = -1;
1713
            else
1714
              url->flags |= URL_FLAGS_TERMINATED;
1715
1
            return;
1716
          }
1717
345
          state = kPathStart;
1718
345
          continue;
1719
        } else {
1720
18
          url->flags |= URL_FLAGS_FAILED;
1721
18
          return;
1722
        }
1723
1657
        break;
1724
      case kFile:
1725
896
        url->scheme = "file:";
1726

896
        if (ch == '/' || ch == '\\') {
1727
710
          state = kFileSlash;
1728

186
        } else if (has_base && base->scheme == "file:") {
1729

179
          switch (ch) {
1730
            case kEOL:
1731
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1732
2
                url->flags |= URL_FLAGS_HAS_HOST;
1733
2
                url->host = base->host;
1734
              }
1735
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1736
2
                url->flags |= URL_FLAGS_HAS_PATH;
1737
2
                url->path = base->path;
1738
              }
1739
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1740
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1741
2
                url->query = base->query;
1742
              }
1743
2
              break;
1744
            case '?':
1745
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1746
2
                url->flags |= URL_FLAGS_HAS_HOST;
1747
2
                url->host = base->host;
1748
              }
1749
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1750
2
                url->flags |= URL_FLAGS_HAS_PATH;
1751
2
                url->path = base->path;
1752
              }
1753
2
              url->flags |= URL_FLAGS_HAS_QUERY;
1754
2
              url->query.clear();
1755
2
              state = kQuery;
1756
2
              break;
1757
            case '#':
1758
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1759
2
                url->flags |= URL_FLAGS_HAS_HOST;
1760
2
                url->host = base->host;
1761
              }
1762
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1763
2
                url->flags |= URL_FLAGS_HAS_PATH;
1764
2
                url->path = base->path;
1765
              }
1766
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1767
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1768
2
                url->query = base->query;
1769
              }
1770
2
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1771
2
              url->fragment.clear();
1772
2
              state = kFragment;
1773
2
              break;
1774
            default:
1775
173
              if (!StartsWithWindowsDriveLetter(p, end)) {
1776
164
                if (base->flags & URL_FLAGS_HAS_HOST) {
1777
164
                  url->flags |= URL_FLAGS_HAS_HOST;
1778
164
                  url->host = base->host;
1779
                }
1780
164
                if (base->flags & URL_FLAGS_HAS_PATH) {
1781
164
                  url->flags |= URL_FLAGS_HAS_PATH;
1782
164
                  url->path = base->path;
1783
                }
1784
164
                ShortenUrlPath(url);
1785
              }
1786
173
              state = kPath;
1787
173
              continue;
1788
          }
1789
        } else {
1790
7
          state = kPath;
1791
7
          continue;
1792
        }
1793
716
        break;
1794
      case kFileSlash:
1795

710
        if (ch == '/' || ch == '\\') {
1796
460
          state = kFileHost;
1797
        } else {
1798

500
          if (has_base &&
1799

497
              base->scheme == "file:" &&
1800
247
              !StartsWithWindowsDriveLetter(p, end)) {
1801
242
            if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1802
1
              url->flags |= URL_FLAGS_HAS_PATH;
1803
1
              url->path.push_back(base->path[0]);
1804
            } else {
1805
241
              if (base->flags & URL_FLAGS_HAS_HOST) {
1806
241
                url->flags |= URL_FLAGS_HAS_HOST;
1807
241
                url->host = base->host;
1808
              } else {
1809
                url->flags &= ~URL_FLAGS_HAS_HOST;
1810
                url->host.clear();
1811
              }
1812
            }
1813
          }
1814
250
          state = kPath;
1815
250
          continue;
1816
        }
1817
460
        break;
1818
      case kFileHost:
1819

819
        if (ch == kEOL ||
1820
358
            ch == '/' ||
1821
353
            ch == '\\' ||
1822
353
            ch == '?' ||
1823
            ch == '#') {
1824

1392
          if (!has_state_override &&
1825

473
              buffer.size() == 2 &&
1826
7
              IsWindowsDriveLetter(buffer)) {
1827
4
            state = kPath;
1828
462
          } else if (buffer.size() == 0) {
1829
403
            url->flags |= URL_FLAGS_HAS_HOST;
1830
403
            url->host.clear();
1831
403
            if (has_state_override)
1832
2
              return;
1833
401
            state = kPathStart;
1834
          } else {
1835
59
            std::string host;
1836
59
            if (!ParseHost(buffer, &host, special)) {
1837
10
              url->flags |= URL_FLAGS_FAILED;
1838
10
              return;
1839
            }
1840
49
            if (host == "localhost")
1841
11
              host.clear();
1842
49
            url->flags |= URL_FLAGS_HAS_HOST;
1843
49
            url->host = host;
1844
49
            if (has_state_override)
1845
2
              return;
1846
47
            buffer.clear();
1847
47
            state = kPathStart;
1848
          }
1849
452
          continue;
1850
        } else {
1851
353
          buffer += ch;
1852
        }
1853
353
        break;
1854
      case kPathStart:
1855
1974
        if (IsSpecial(url->scheme)) {
1856
1820
          state = kPath;
1857

1820
          if (ch != '/' && ch != '\\') {
1858
574
            continue;
1859
          }
1860

154
        } else if (!has_state_override && ch == '?') {
1861
3
          url->flags |= URL_FLAGS_HAS_QUERY;
1862
3
          url->query.clear();
1863
3
          state = kQuery;
1864

151
        } else if (!has_state_override && ch == '#') {
1865
3
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1866
3
          url->fragment.clear();
1867
3
          state = kFragment;
1868
148
        } else if (ch != kEOL) {
1869
117
          state = kPath;
1870
117
          if (ch != '/') {
1871
6
            continue;
1872
          }
1873
        }
1874
1394
        break;
1875
      case kPath:
1876

64272
        if (ch == kEOL ||
1877
56506
            ch == '/' ||
1878
56473
            special_back_slash ||
1879

97756
            (!has_state_override && (ch == '?' || ch == '#'))) {
1880
7902
          if (IsDoubleDotSegment(buffer)) {
1881
110
            ShortenUrlPath(url);
1882

110
            if (ch != '/' && !special_back_slash) {
1883
29
              url->flags |= URL_FLAGS_HAS_PATH;
1884
29
              url->path.push_back("");
1885
            }
1886

15677
          } else if (IsSingleDotSegment(buffer) &&
1887

7800
                     ch != '/' && !special_back_slash) {
1888
7
            url->flags |= URL_FLAGS_HAS_PATH;
1889
7
            url->path.push_back("");
1890
7785
          } else if (!IsSingleDotSegment(buffer)) {
1891

21214
            if (url->scheme == "file:" &&
1892
6705
                url->path.empty() &&
1893

8624
                buffer.size() == 2 &&
1894
36
                IsWindowsDriveLetter(buffer)) {
1895

51
              if ((url->flags & URL_FLAGS_HAS_HOST) &&
1896
16
                  !url->host.empty()) {
1897
3
                url->host.clear();
1898
3
                url->flags |= URL_FLAGS_HAS_HOST;
1899
              }
1900
35
              buffer[1] = ':';
1901
            }
1902
7699
            url->flags |= URL_FLAGS_HAS_PATH;
1903
7699
            std::string segment(buffer.c_str(), buffer.size());
1904
7699
            url->path.push_back(segment);
1905
          }
1906
7902
          buffer.clear();
1907

8955
          if (url->scheme == "file:" &&
1908
4891
              (ch == kEOL ||
1909
4879
               ch == '?' ||
1910
               ch == '#')) {
1911

2127
            while (url->path.size() > 1 && url->path[0].length() == 0) {
1912
21
              url->path.erase(url->path.begin());
1913
            }
1914
          }
1915
7902
          if (ch == '?') {
1916
92
            url->flags |= URL_FLAGS_HAS_QUERY;
1917
92
            state = kQuery;
1918
7810
          } else if (ch == '#') {
1919
11
            state = kFragment;
1920
7902
          }
1921
        } else {
1922
56370
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1923
        }
1924
64272
        break;
1925
      case kCannotBeBase:
1926
3593
        switch (ch) {
1927
          case '?':
1928
2
            state = kQuery;
1929
2
            break;
1930
          case '#':
1931
5
            state = kFragment;
1932
5
            break;
1933
          default:
1934
3586
            if (url->path.size() == 0)
1935
              url->path.push_back("");
1936

3586
            if (url->path.size() > 0 && ch != kEOL)
1937
3056
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1938
        }
1939
3593
        break;
1940
      case kQuery:
1941

2759
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1942
233
          url->flags |= URL_FLAGS_HAS_QUERY;
1943
233
          url->query = buffer;
1944
233
          buffer.clear();
1945
466
          if (ch == '#')
1946
54
            state = kFragment;
1947
        } else {
1948
2526
          AppendOrEscape(&buffer, ch, QUERY_ENCODE_SET);
1949
        }
1950
2759
        break;
1951
      case kFragment:
1952
575
        switch (ch) {
1953
          case kEOL:
1954
126
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1955
126
            url->fragment = buffer;
1956
126
            break;
1957
          case 0:
1958
2
            break;
1959
          default:
1960
447
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
1961
        }
1962
575
        break;
1963
      default:
1964
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1965
        return;
1966
    }
1967
1968
128256
    p++;
1969
3298
  }
1970
}  // NOLINT(readability/fn_size)
1971
1972
2986
static inline void SetArgs(Environment* env,
1973
                           Local<Value> argv[],
1974
                           const struct url_data* url) {
1975
2986
  Isolate* isolate = env->isolate();
1976
5972
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
1977
5972
  argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
1978
2986
  if (url->flags & URL_FLAGS_HAS_USERNAME)
1979
434
    argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
1980
2986
  if (url->flags & URL_FLAGS_HAS_PASSWORD)
1981
426
    argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password);
1982
2986
  if (url->flags & URL_FLAGS_HAS_HOST)
1983
4668
    argv[ARG_HOST] = UTF8STRING(isolate, url->host);
1984
2986
  if (url->flags & URL_FLAGS_HAS_QUERY)
1985
470
    argv[ARG_QUERY] = UTF8STRING(isolate, url->query);
1986
2986
  if (url->flags & URL_FLAGS_HAS_FRAGMENT)
1987
244
    argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment);
1988
2986
  if (url->port > -1)
1989
656
    argv[ARG_PORT] = Integer::New(isolate, url->port);
1990
2986
  if (url->flags & URL_FLAGS_HAS_PATH)
1991
5444
    argv[ARG_PATH] = Copy(env, url->path);
1992
2986
}
1993
1994
3212
static void Parse(Environment* env,
1995
                  Local<Value> recv,
1996
                  const char* input,
1997
                  const size_t len,
1998
                  enum url_parse_state state_override,
1999
                  Local<Value> base_obj,
2000
                  Local<Value> context_obj,
2001
                  Local<Function> cb,
2002
                  Local<Value> error_cb) {
2003
3212
  Isolate* isolate = env->isolate();
2004
3212
  Local<Context> context = env->context();
2005
3212
  HandleScope handle_scope(isolate);
2006
3195
  Context::Scope context_scope(context);
2007
2008
3212
  const bool has_context = context_obj->IsObject();
2009
3212
  const bool has_base = base_obj->IsObject();
2010
2011
6407
  struct url_data base;
2012
6407
  struct url_data url;
2013
3212
  if (has_context)
2014
509
    HarvestContext(env, &url, context_obj.As<Object>());
2015
3212
  if (has_base)
2016
938
    HarvestBase(env, &base, base_obj.As<Object>());
2017
2018
3212
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2019

3212
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2020
509
      ((state_override != kUnknownState) &&
2021
509
       (url.flags & URL_FLAGS_TERMINATED)))
2022
3229
    return;
2023
2024
  // Define the return value placeholders
2025
  const Local<Value> undef = Undefined(isolate);
2026
  const Local<Value> null = Null(isolate);
2027
3195
  if (!(url.flags & URL_FLAGS_FAILED)) {
2028
    Local<Value> argv[9] = {
2029
      undef,
2030
      undef,
2031
      undef,
2032
      undef,
2033
      null,  // host defaults to null
2034
      null,  // port defaults to null
2035
      undef,
2036
      null,  // query defaults to null
2037
      null,  // fragment defaults to null
2038
2919
    };
2039
2919
    SetArgs(env, argv, &url);
2040
8757
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2041
276
  } else if (error_cb->IsFunction()) {
2042
229
    Local<Value> argv[2] = { undef, undef };
2043
458
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2044
    argv[ERR_ARG_INPUT] =
2045
      String::NewFromUtf8(env->isolate(),
2046
                          input,
2047
458
                          v8::NewStringType::kNormal).ToLocalChecked();
2048
916
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2049
687
        .FromMaybe(Local<Value>());
2050
3195
  }
2051
}
2052
2053
3212
static void Parse(const FunctionCallbackInfo<Value>& args) {
2054
3212
  Environment* env = Environment::GetCurrent(args);
2055
3212
  CHECK_GE(args.Length(), 5);
2056
9636
  CHECK(args[0]->IsString());  // input
2057



19065
  CHECK(args[2]->IsUndefined() ||  // base context
2058
        args[2]->IsNull() ||
2059
        args[2]->IsObject());
2060



15393
  CHECK(args[3]->IsUndefined() ||  // context
2061
        args[3]->IsNull() ||
2062
        args[3]->IsObject());
2063
6424
  CHECK(args[4]->IsFunction());  // complete callback
2064


18254
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
2065
2066
3212
  Utf8Value input(env->isolate(), args[0]);
2067
3212
  enum url_parse_state state_override = kUnknownState;
2068
6424
  if (args[1]->IsNumber()) {
2069
    state_override = static_cast<enum url_parse_state>(
2070
12848
        args[1]->Uint32Value(env->context()).FromJust());
2071
  }
2072
2073
  Parse(env, args.This(),
2074
3212
        *input, input.length(),
2075
        state_override,
2076
        args[2],
2077
        args[3],
2078
        args[4].As<Function>(),
2079
9636
        args[5]);
2080
3212
}
2081
2082
22
static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2083
22
  Environment* env = Environment::GetCurrent(args);
2084
22
  CHECK_GE(args.Length(), 1);
2085
66
  CHECK(args[0]->IsString());
2086
22
  Utf8Value value(env->isolate(), args[0]);
2087
44
  std::string output;
2088
22
  const size_t len = value.length();
2089
22
  output.reserve(len);
2090
233
  for (size_t n = 0; n < len; n++) {
2091
211
    const char ch = (*value)[n];
2092
211
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2093
  }
2094
  args.GetReturnValue().Set(
2095
      String::NewFromUtf8(env->isolate(),
2096
                          output.c_str(),
2097
88
                          v8::NewStringType::kNormal).ToLocalChecked());
2098
22
}
2099
2100
11
static void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2101
11
  Environment* env = Environment::GetCurrent(args);
2102
11
  CHECK_GE(args.Length(), 2);
2103
33
  CHECK(args[0]->IsString());
2104
22
  CHECK(args[1]->IsNumber());
2105
2106
11
  TwoByteValue value(env->isolate(), args[0]);
2107
11
  const size_t n = value.length();
2108
2109
44
  const int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2110
11
  CHECK_GE(start, 0);
2111
2112
32
  for (size_t i = start; i < n; i++) {
2113
21
    char16_t c = value[i];
2114
21
    if (!IsUnicodeSurrogate(c)) {
2115
8
      continue;
2116

13
    } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) {
2117
12
      value[i] = kUnicodeReplacementCharacter;
2118
    } else {
2119
1
      char16_t d = value[i + 1];
2120
1
      if (IsUnicodeTrail(d)) {
2121
        i++;
2122
      } else {
2123
1
        value[i] = kUnicodeReplacementCharacter;
2124
      }
2125
    }
2126
  }
2127
2128
  args.GetReturnValue().Set(
2129
      String::NewFromTwoByte(env->isolate(),
2130
11
                             *value,
2131
                             v8::NewStringType::kNormal,
2132
44
                             n).ToLocalChecked());
2133
11
}
2134
2135
222
static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2136
222
  Environment* env = Environment::GetCurrent(args);
2137
222
  CHECK_GE(args.Length(), 1);
2138
666
  CHECK(args[0]->IsString());
2139
222
  Utf8Value value(env->isolate(), args[0]);
2140
2141
435
  URLHost host;
2142
  // Assuming the host is used for a special scheme.
2143
222
  host.ParseHost(*value, value.length(), true);
2144
222
  if (host.ParsingFailed()) {
2145
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2146
231
    return;
2147
  }
2148
426
  std::string out = host.ToString();
2149
  args.GetReturnValue().Set(
2150
      String::NewFromUtf8(env->isolate(),
2151
                          out.c_str(),
2152
852
                          v8::NewStringType::kNormal).ToLocalChecked());
2153
}
2154
2155
201
static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2156
201
  Environment* env = Environment::GetCurrent(args);
2157
201
  CHECK_GE(args.Length(), 1);
2158
603
  CHECK(args[0]->IsString());
2159
201
  Utf8Value value(env->isolate(), args[0]);
2160
2161
393
  URLHost host;
2162
  // Assuming the host is used for a special scheme.
2163
201
  host.ParseHost(*value, value.length(), true, true);
2164
201
  if (host.ParsingFailed()) {
2165
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2166
210
    return;
2167
  }
2168
384
  std::string out = host.ToString();
2169
  args.GetReturnValue().Set(
2170
      String::NewFromUtf8(env->isolate(),
2171
                          out.c_str(),
2172
768
                          v8::NewStringType::kNormal).ToLocalChecked());
2173
}
2174
2175
323
std::string URL::ToFilePath() const {
2176
323
  if (context_.scheme != "file:") {
2177
3
    return "";
2178
  }
2179
2180
#ifdef _WIN32
2181
  const char* slash = "\\";
2182
  auto is_slash = [] (char ch) {
2183
    return ch == '/' || ch == '\\';
2184
  };
2185
#else
2186
320
  const char* slash = "/";
2187
25472
  auto is_slash = [] (char ch) {
2188
    return ch == '/';
2189
25472
  };
2190

640
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2191
320
      context_.host.length() > 0) {
2192
1
    return "";
2193
  }
2194
#endif
2195
319
  std::string decoded_path;
2196
3041
  for (const std::string& part : context_.path) {
2197
2723
    std::string decoded = PercentDecode(part.c_str(), part.length());
2198
28194
    for (char& ch : decoded) {
2199
25472
      if (is_slash(ch)) {
2200
1
        return "";
2201
      }
2202
    }
2203
2722
    decoded_path += slash + decoded;
2204
2722
  }
2205
2206
#ifdef _WIN32
2207
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2208
2209
  // If hostname is set, then we have a UNC path. Pass the hostname through
2210
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
2211
  // need to worry about percent encoding because the URL parser will have
2212
  // already taken care of that for us. Note that this only causes IDNs with an
2213
  // appropriate `xn--` prefix to be decoded.
2214
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2215
      context_.host.length() > 0) {
2216
    std::string unicode_host;
2217
    if (!ToUnicode(context_.host, &unicode_host)) {
2218
      return "";
2219
    }
2220
    return "\\\\" + unicode_host + decoded_path;
2221
  }
2222
  // Otherwise, it's a local path that requires a drive letter.
2223
  if (decoded_path.length() < 3) {
2224
    return "";
2225
  }
2226
  if (decoded_path[2] != ':' ||
2227
      !IsASCIIAlpha(decoded_path[1])) {
2228
    return "";
2229
  }
2230
  // Strip out the leading '\'.
2231
  return decoded_path.substr(1);
2232
#else
2233
318
  return decoded_path;
2234
#endif
2235
}
2236
2237
// This function works by calling out to a JS function that creates and
2238
// returns the JS URL object. Be mindful of the JS<->Native boundary
2239
// crossing that is required.
2240
67
const Local<Value> URL::ToObject(Environment* env) const {
2241
67
  Isolate* isolate = env->isolate();
2242
67
  Local<Context> context = env->context();
2243
  Context::Scope context_scope(context);
2244
2245
  const Local<Value> undef = Undefined(isolate);
2246
  const Local<Value> null = Null(isolate);
2247
2248
67
  if (context_.flags & URL_FLAGS_FAILED)
2249
    return Local<Value>();
2250
2251
  Local<Value> argv[9] = {
2252
    undef,
2253
    undef,
2254
    undef,
2255
    undef,
2256
    null,  // host defaults to null
2257
    null,  // port defaults to null
2258
    undef,
2259
    null,  // query defaults to null
2260
    null,  // fragment defaults to null
2261
67
  };
2262
67
  SetArgs(env, argv, &context_);
2263
2264
  MaybeLocal<Value> ret;
2265
  {
2266
67
    FatalTryCatch try_catch(env);
2267
2268
    // The SetURLConstructor method must have been called already to
2269
    // set the constructor function used below. SetURLConstructor is
2270
    // called automatically when the internal/url.js module is loaded
2271
    // during the internal/bootstrap_node.js processing.
2272
    ret = env->url_constructor_function()
2273
134
        ->Call(env->context(), undef, arraysize(argv), argv);
2274
  }
2275
2276
67
  return ret.ToLocalChecked();
2277
}
2278
2279
3348
static void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2280
3348
  Environment* env = Environment::GetCurrent(args);
2281
3348
  CHECK_EQ(args.Length(), 1);
2282
6696
  CHECK(args[0]->IsFunction());
2283
6696
  env->set_url_constructor_function(args[0].As<Function>());
2284
3348
}
2285
2286
3348
static void Init(Local<Object> target,
2287
                 Local<Value> unused,
2288
                 Local<Context> context,
2289
                 void* priv) {
2290
3348
  Environment* env = Environment::GetCurrent(context);
2291
3348
  env->SetMethod(target, "parse", Parse);
2292
3348
  env->SetMethod(target, "encodeAuth", EncodeAuthSet);
2293
3348
  env->SetMethod(target, "toUSVString", ToUSVString);
2294
3348
  env->SetMethod(target, "domainToASCII", DomainToASCII);
2295
3348
  env->SetMethod(target, "domainToUnicode", DomainToUnicode);
2296
3348
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2297
2298
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2299
120528
  FLAGS(XX)
2300
#undef XX
2301
2302
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
2303
210924
  PARSESTATES(XX)
2304
#undef XX
2305
3348
}
2306
}  // namespace url
2307
}  // namespace node
2308
2309
3391
NODE_BUILTIN_MODULE_CONTEXT_AWARE(url, node::url::Init)