GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage/nodes/benchmark/out/../src/node_url.cc Lines: 1131 1160 97.5 %
Date: 2017-10-21 Branches: 1054 1183 89.1 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "node_internals.h"
3
#include "base-object.h"
4
#include "base-object-inl.h"
5
#include "node_i18n.h"
6
7
#include <string>
8
#include <vector>
9
#include <stdio.h>
10
#include <cmath>
11
12
namespace node {
13
14
using v8::Array;
15
using v8::Context;
16
using v8::Function;
17
using v8::FunctionCallbackInfo;
18
using v8::HandleScope;
19
using v8::Integer;
20
using v8::Isolate;
21
using v8::Local;
22
using v8::MaybeLocal;
23
using v8::Null;
24
using v8::Object;
25
using v8::String;
26
using v8::TryCatch;
27
using v8::Undefined;
28
using v8::Value;
29
30
#define GET(env, obj, name)                                                   \
31
  obj->Get(env->context(),                                                    \
32
           OneByteString(env->isolate(), name)).ToLocalChecked()
33
34
#define GET_AND_SET(env, obj, name, data, flag)                               \
35
  {                                                                           \
36
    Local<Value> val = GET(env, obj, #name);                                  \
37
    if (val->IsString()) {                                                    \
38
      Utf8Value value(env->isolate(), val.As<String>());                      \
39
      data->name = *value;                                                    \
40
      data->flags |= flag;                                                    \
41
    }                                                                         \
42
  }
43
44
#define UTF8STRING(isolate, str)                                              \
45
  String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal)       \
46
    .ToLocalChecked()
47
48
namespace url {
49
50
// https://url.spec.whatwg.org/#eof-code-point
51
static const char kEOL = -1;
52
53
// Used in ToUSVString().
54
static const char16_t kUnicodeReplacementCharacter = 0xFFFD;
55
56
// https://url.spec.whatwg.org/#concept-host
57
union url_host_value {
58
  std::string domain;
59
  uint32_t ipv4;
60
  uint16_t ipv6[8];
61
  std::string opaque;
62
2026
  ~url_host_value() {}
63
};
64
65
enum url_host_type {
66
  HOST_TYPE_FAILED = -1,
67
  HOST_TYPE_DOMAIN = 0,
68
  HOST_TYPE_IPV4 = 1,
69
  HOST_TYPE_IPV6 = 2,
70
  HOST_TYPE_OPAQUE = 3,
71
};
72
73
2026
struct url_host {
74
  url_host_value value;
75
  enum url_host_type type;
76
};
77
78
#define ARGS(XX)                                                              \
79
  XX(ARG_FLAGS)                                                               \
80
  XX(ARG_PROTOCOL)                                                            \
81
  XX(ARG_USERNAME)                                                            \
82
  XX(ARG_PASSWORD)                                                            \
83
  XX(ARG_HOST)                                                                \
84
  XX(ARG_PORT)                                                                \
85
  XX(ARG_PATH)                                                                \
86
  XX(ARG_QUERY)                                                               \
87
  XX(ARG_FRAGMENT)
88
89
#define ERR_ARGS(XX)                                                          \
90
  XX(ERR_ARG_FLAGS)                                                           \
91
  XX(ERR_ARG_INPUT)                                                           \
92
93
enum url_cb_args {
94
#define XX(name) name,
95
  ARGS(XX)
96
#undef XX
97
};
98
99
enum url_error_cb_args {
100
#define XX(name) name,
101
  ERR_ARGS(XX)
102
#undef XX
103
};
104
105
#define CHAR_TEST(bits, name, expr)                                           \
106
  template <typename T>                                                       \
107
  static inline bool name(const T ch) {                                       \
108
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
109
                  "Character must be wider than " #bits " bits");             \
110
    return (expr);                                                            \
111
  }
112
113
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
114
  template <typename T>                                                       \
115
  static inline bool name(const T ch1, const T ch2) {                         \
116
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
117
                  "Character must be wider than " #bits " bits");             \
118
    return (expr);                                                            \
119
  }                                                                           \
120
  template <typename T>                                                       \
121
  static inline bool name(const std::basic_string<T>& str) {                  \
122
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
123
                  "Character must be wider than " #bits " bits");             \
124
    return str.length() >= 2 && name(str[0], str[1]);                         \
125
  }
126
127
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
128

353874
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
129
130
// https://infra.spec.whatwg.org/#c0-control-or-space
131

12456
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
132
133
// https://infra.spec.whatwg.org/#ascii-digit
134

29552
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
135
136
// https://infra.spec.whatwg.org/#ascii-hex-digit
137


946
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
138
                               (ch >= 'A' && ch <= 'F') ||
139
                               (ch >= 'a' && ch <= 'f')))
140
141
// https://infra.spec.whatwg.org/#ascii-alpha
142


55181
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
143
                            (ch >= 'a' && ch <= 'z')))
144
145
// https://infra.spec.whatwg.org/#ascii-alphanumeric
146

24191
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
147
148
// https://infra.spec.whatwg.org/#ascii-lowercase
149
template <typename T>
150
24226
static inline T ASCIILowercase(T ch) {
151
24226
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
152
}
153
154
// https://url.spec.whatwg.org/#forbidden-host-code-point
155







30046
CHAR_TEST(8, IsForbiddenHostCodePoint,
156
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
157
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
158
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
159
          ch == '\\' || ch == ']')
160
161
// https://url.spec.whatwg.org/#windows-drive-letter
162


355
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
163
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
164
165
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
166


293
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
167
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
168
169
// If a UTF-16 character is a low/trailing surrogate.
170
1
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
171
172
// If a UTF-16 character is a surrogate.
173
21
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
174
175
// If a UTF-16 surrogate is a low/trailing one.
176
13
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
177
178
#undef CHAR_TEST
179
#undef TWO_CHAR_STRING_TEST
180
181
static const char* hex[256] = {
182
  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
183
  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
184
  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
185
  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
186
  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
187
  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
188
  "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
189
  "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
190
  "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
191
  "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
192
  "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
193
  "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
194
  "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
195
  "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
196
  "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
197
  "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
198
  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
199
  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
200
  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
201
  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
202
  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
203
  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
204
  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
205
  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
206
  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
207
  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
208
  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
209
  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
210
  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
211
  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
212
  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
213
  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
214
};
215
216
static const uint8_t C0_CONTROL_ENCODE_SET[32] = {
217
  // 00     01     02     03     04     05     06     07
218
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
219
  // 08     09     0A     0B     0C     0D     0E     0F
220
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
221
  // 10     11     12     13     14     15     16     17
222
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
223
  // 18     19     1A     1B     1C     1D     1E     1F
224
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
225
  // 20     21     22     23     24     25     26     27
226
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
227
  // 28     29     2A     2B     2C     2D     2E     2F
228
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
229
  // 30     31     32     33     34     35     36     37
230
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
231
  // 38     39     3A     3B     3C     3D     3E     3F
232
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
233
  // 40     41     42     43     44     45     46     47
234
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
235
  // 48     49     4A     4B     4C     4D     4E     4F
236
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
237
  // 50     51     52     53     54     55     56     57
238
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
239
  // 58     59     5A     5B     5C     5D     5E     5F
240
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
241
  // 60     61     62     63     64     65     66     67
242
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
243
  // 68     69     6A     6B     6C     6D     6E     6F
244
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
245
  // 70     71     72     73     74     75     76     77
246
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
247
  // 78     79     7A     7B     7C     7D     7E     7F
248
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
249
  // 80     81     82     83     84     85     86     87
250
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
251
  // 88     89     8A     8B     8C     8D     8E     8F
252
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
253
  // 90     91     92     93     94     95     96     97
254
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
255
  // 98     99     9A     9B     9C     9D     9E     9F
256
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
257
  // A0     A1     A2     A3     A4     A5     A6     A7
258
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
259
  // A8     A9     AA     AB     AC     AD     AE     AF
260
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
261
  // B0     B1     B2     B3     B4     B5     B6     B7
262
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
263
  // B8     B9     BA     BB     BC     BD     BE     BF
264
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
265
  // C0     C1     C2     C3     C4     C5     C6     C7
266
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
267
  // C8     C9     CA     CB     CC     CD     CE     CF
268
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269
  // D0     D1     D2     D3     D4     D5     D6     D7
270
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271
  // D8     D9     DA     DB     DC     DD     DE     DF
272
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273
  // E0     E1     E2     E3     E4     E5     E6     E7
274
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
275
  // E8     E9     EA     EB     EC     ED     EE     EF
276
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
277
  // F0     F1     F2     F3     F4     F5     F6     F7
278
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
279
  // F8     F9     FA     FB     FC     FD     FE     FF
280
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
281
};
282
283
static const uint8_t PATH_ENCODE_SET[32] = {
284
  // 00     01     02     03     04     05     06     07
285
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
286
  // 08     09     0A     0B     0C     0D     0E     0F
287
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
288
  // 10     11     12     13     14     15     16     17
289
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
290
  // 18     19     1A     1B     1C     1D     1E     1F
291
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
292
  // 20     21     22     23     24     25     26     27
293
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
294
  // 28     29     2A     2B     2C     2D     2E     2F
295
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
296
  // 30     31     32     33     34     35     36     37
297
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
298
  // 38     39     3A     3B     3C     3D     3E     3F
299
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
300
  // 40     41     42     43     44     45     46     47
301
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
302
  // 48     49     4A     4B     4C     4D     4E     4F
303
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
304
  // 50     51     52     53     54     55     56     57
305
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
306
  // 58     59     5A     5B     5C     5D     5E     5F
307
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
308
  // 60     61     62     63     64     65     66     67
309
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
310
  // 68     69     6A     6B     6C     6D     6E     6F
311
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
312
  // 70     71     72     73     74     75     76     77
313
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
314
  // 78     79     7A     7B     7C     7D     7E     7F
315
    0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
316
  // 80     81     82     83     84     85     86     87
317
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
318
  // 88     89     8A     8B     8C     8D     8E     8F
319
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
320
  // 90     91     92     93     94     95     96     97
321
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
322
  // 98     99     9A     9B     9C     9D     9E     9F
323
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
324
  // A0     A1     A2     A3     A4     A5     A6     A7
325
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
326
  // A8     A9     AA     AB     AC     AD     AE     AF
327
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
328
  // B0     B1     B2     B3     B4     B5     B6     B7
329
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
330
  // B8     B9     BA     BB     BC     BD     BE     BF
331
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
332
  // C0     C1     C2     C3     C4     C5     C6     C7
333
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
334
  // C8     C9     CA     CB     CC     CD     CE     CF
335
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336
  // D0     D1     D2     D3     D4     D5     D6     D7
337
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338
  // D8     D9     DA     DB     DC     DD     DE     DF
339
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340
  // E0     E1     E2     E3     E4     E5     E6     E7
341
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
342
  // E8     E9     EA     EB     EC     ED     EE     EF
343
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
344
  // F0     F1     F2     F3     F4     F5     F6     F7
345
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
346
  // F8     F9     FA     FB     FC     FD     FE     FF
347
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
348
};
349
350
static const uint8_t USERINFO_ENCODE_SET[32] = {
351
  // 00     01     02     03     04     05     06     07
352
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
353
  // 08     09     0A     0B     0C     0D     0E     0F
354
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
355
  // 10     11     12     13     14     15     16     17
356
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
357
  // 18     19     1A     1B     1C     1D     1E     1F
358
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
359
  // 20     21     22     23     24     25     26     27
360
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
361
  // 28     29     2A     2B     2C     2D     2E     2F
362
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
363
  // 30     31     32     33     34     35     36     37
364
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
365
  // 38     39     3A     3B     3C     3D     3E     3F
366
    0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
367
  // 40     41     42     43     44     45     46     47
368
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
369
  // 48     49     4A     4B     4C     4D     4E     4F
370
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
371
  // 50     51     52     53     54     55     56     57
372
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
373
  // 58     59     5A     5B     5C     5D     5E     5F
374
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
375
  // 60     61     62     63     64     65     66     67
376
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
377
  // 68     69     6A     6B     6C     6D     6E     6F
378
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
379
  // 70     71     72     73     74     75     76     77
380
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
381
  // 78     79     7A     7B     7C     7D     7E     7F
382
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
383
  // 80     81     82     83     84     85     86     87
384
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
385
  // 88     89     8A     8B     8C     8D     8E     8F
386
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
387
  // 90     91     92     93     94     95     96     97
388
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
389
  // 98     99     9A     9B     9C     9D     9E     9F
390
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
391
  // A0     A1     A2     A3     A4     A5     A6     A7
392
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
393
  // A8     A9     AA     AB     AC     AD     AE     AF
394
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
395
  // B0     B1     B2     B3     B4     B5     B6     B7
396
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
397
  // B8     B9     BA     BB     BC     BD     BE     BF
398
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
399
  // C0     C1     C2     C3     C4     C5     C6     C7
400
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
401
  // C8     C9     CA     CB     CC     CD     CE     CF
402
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
403
  // D0     D1     D2     D3     D4     D5     D6     D7
404
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
405
  // D8     D9     DA     DB     DC     DD     DE     DF
406
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
407
  // E0     E1     E2     E3     E4     E5     E6     E7
408
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
409
  // E8     E9     EA     EB     EC     ED     EE     EF
410
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
411
  // F0     F1     F2     F3     F4     F5     F6     F7
412
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
413
  // F8     F9     FA     FB     FC     FD     FE     FF
414
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
415
};
416
417
static const uint8_t QUERY_ENCODE_SET[32] = {
418
  // 00     01     02     03     04     05     06     07
419
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
420
  // 08     09     0A     0B     0C     0D     0E     0F
421
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
422
  // 10     11     12     13     14     15     16     17
423
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
424
  // 18     19     1A     1B     1C     1D     1E     1F
425
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
426
  // 20     21     22     23     24     25     26     27
427
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
428
  // 28     29     2A     2B     2C     2D     2E     2F
429
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430
  // 30     31     32     33     34     35     36     37
431
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
432
  // 38     39     3A     3B     3C     3D     3E     3F
433
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
434
  // 40     41     42     43     44     45     46     47
435
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
436
  // 48     49     4A     4B     4C     4D     4E     4F
437
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
438
  // 50     51     52     53     54     55     56     57
439
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
440
  // 58     59     5A     5B     5C     5D     5E     5F
441
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
442
  // 60     61     62     63     64     65     66     67
443
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
444
  // 68     69     6A     6B     6C     6D     6E     6F
445
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
446
  // 70     71     72     73     74     75     76     77
447
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
448
  // 78     79     7A     7B     7C     7D     7E     7F
449
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
450
  // 80     81     82     83     84     85     86     87
451
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452
  // 88     89     8A     8B     8C     8D     8E     8F
453
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454
  // 90     91     92     93     94     95     96     97
455
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456
  // 98     99     9A     9B     9C     9D     9E     9F
457
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458
  // A0     A1     A2     A3     A4     A5     A6     A7
459
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460
  // A8     A9     AA     AB     AC     AD     AE     AF
461
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462
  // B0     B1     B2     B3     B4     B5     B6     B7
463
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
464
  // B8     B9     BA     BB     BC     BD     BE     BF
465
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
466
  // C0     C1     C2     C3     C4     C5     C6     C7
467
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
468
  // C8     C9     CA     CB     CC     CD     CE     CF
469
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
470
  // D0     D1     D2     D3     D4     D5     D6     D7
471
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
472
  // D8     D9     DA     DB     DC     DD     DE     DF
473
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
474
  // E0     E1     E2     E3     E4     E5     E6     E7
475
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
476
  // E8     E9     EA     EB     EC     ED     EE     EF
477
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
478
  // F0     F1     F2     F3     F4     F5     F6     F7
479
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
480
  // F8     F9     FA     FB     FC     FD     FE     FF
481
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
482
};
483
484
262160
static inline bool BitAt(const uint8_t a[], const uint8_t i) {
485
262160
  return !!(a[i >> 3] & (1 << (i & 7)));
486
}
487
488
// Appends ch to str. If ch position in encode_set is set, the ch will
489
// be percent-encoded then appended.
490
262160
static inline void AppendOrEscape(std::string* str,
491
                                  const unsigned char ch,
492
                                  const uint8_t encode_set[]) {
493
262160
  if (BitAt(encode_set, ch))
494
463
    *str += hex[ch];
495
  else
496
261697
    *str += ch;
497
262160
}
498
499
template <typename T>
500
679
static inline unsigned hex2bin(const T ch) {
501

679
  if (ch >= '0' && ch <= '9')
502
588
    return ch - '0';
503

91
  if (ch >= 'A' && ch <= 'F')
504
22
    return 10 + (ch - 'A');
505

69
  if (ch >= 'a' && ch <= 'f')
506
69
    return 10 + (ch - 'a');
507
  return static_cast<unsigned>(-1);
508
}
509
510
3664
static inline void PercentDecode(const char* input,
511
                                 size_t len,
512
                                 std::string* dest) {
513
3664
  if (len == 0)
514
3666
    return;
515
3662
  dest->reserve(len);
516
3662
  const char* pointer = input;
517
3662
  const char* end = input + len;
518
519
53349
  while (pointer < end) {
520
46025
    const char ch = pointer[0];
521
46025
    const size_t remaining = end - pointer - 1;
522


91797
    if (ch != '%' || remaining < 2 ||
523
259
        (ch == '%' &&
524
516
         (!IsASCIIHexDigit(pointer[1]) ||
525
257
          !IsASCIIHexDigit(pointer[2])))) {
526
45772
      *dest += ch;
527
45772
      pointer++;
528
45772
      continue;
529
    } else {
530
253
      unsigned a = hex2bin(pointer[1]);
531
253
      unsigned b = hex2bin(pointer[2]);
532
253
      char c = static_cast<char>(a * 16 + b);
533
253
      *dest += c;
534
253
      pointer += 3;
535
    }
536
  }
537
}
538
539
#define SPECIALS(XX)                                                          \
540
  XX("ftp:", 21)                                                              \
541
  XX("file:", -1)                                                             \
542
  XX("gopher:", 70)                                                           \
543
  XX("http:", 80)                                                             \
544
  XX("https:", 443)                                                           \
545
  XX("ws:", 80)                                                               \
546
  XX("wss:", 443)
547
548
11041
static inline bool IsSpecial(std::string scheme) {
549
#define XX(name, _) if (scheme == name) return true;
550



11041
  SPECIALS(XX);
551
#undef XX
552
975
  return false;
553
}
554
555
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
556
272
static inline bool StartsWithWindowsDriveLetter(const char* p,
557
                                                const char* end) {
558
272
  const size_t length = end - p;
559
269
  return length >= 2 &&
560

301
    IsWindowsDriveLetter(p[0], p[1]) &&
561
14
    (length == 2 ||
562
21
      p[2] == '/' ||
563
10
      p[2] == '\\' ||
564
5
      p[2] == '?' ||
565
274
      p[2] == '#');
566
}
567
568
6081
static inline int NormalizePort(std::string scheme, int p) {
569
#define XX(name, port) if (scheme == name && p == port) return -1;
570










6081
  SPECIALS(XX);
571
#undef XX
572
2430
  return p;
573
}
574
575
#if defined(NODE_HAVE_I18N_SUPPORT)
576
192
static inline bool ToUnicode(const std::string& input, std::string* output) {
577
192
  MaybeStackBuffer<char> buf;
578
192
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
579
    return false;
580
192
  output->assign(*buf, buf.length());
581
192
  return true;
582
}
583
584
1820
static inline bool ToASCII(const std::string& input, std::string* output) {
585
1820
  MaybeStackBuffer<char> buf;
586
1820
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
587
61
    return false;
588
1759
  output->assign(*buf, buf.length());
589
1759
  return true;
590
}
591
#else
592
// Intentional non-ops if ICU is not present.
593
static inline bool ToUnicode(const std::string& input, std::string* output) {
594
  *output = input;
595
  return true;
596
}
597
598
static inline bool ToASCII(const std::string& input, std::string* output) {
599
  *output = input;
600
  return true;
601
}
602
#endif
603
604
66
static url_host_type ParseIPv6Host(url_host* host,
605
                                   const char* input,
606
                                   size_t length) {
607
66
  url_host_type type = HOST_TYPE_FAILED;
608
594
  for (unsigned n = 0; n < 8; n++)
609
528
    host->value.ipv6[n] = 0;
610
66
  uint16_t* piece_pointer = &host->value.ipv6[0];
611
66
  uint16_t* last_piece = piece_pointer + 8;
612
66
  uint16_t* compress_pointer = nullptr;
613
66
  const char* pointer = input;
614
66
  const char* end = pointer + length;
615
  unsigned value, len, swaps, numbers_seen;
616
66
  char ch = pointer < end ? pointer[0] : kEOL;
617
66
  if (ch == ':') {
618

24
    if (length < 2 || pointer[1] != ':')
619
      goto end;
620
22
    pointer += 2;
621
22
    ch = pointer < end ? pointer[0] : kEOL;
622
22
    piece_pointer++;
623
22
    compress_pointer = piece_pointer;
624
  }
625
259
  while (ch != kEOL) {
626
171
    if (piece_pointer > last_piece)
627
      goto end;
628
171
    if (ch == ':') {
629
14
      if (compress_pointer != nullptr)
630
2
        goto end;
631
12
      pointer++;
632
12
      ch = pointer < end ? pointer[0] : kEOL;
633
12
      piece_pointer++;
634
12
      compress_pointer = piece_pointer;
635
12
      continue;
636
    }
637
157
    value = 0;
638
157
    len = 0;
639

487
    while (len < 4 && IsASCIIHexDigit(ch)) {
640
173
      value = value * 0x10 + hex2bin(ch);
641
173
      pointer++;
642
173
      ch = pointer < end ? pointer[0] : kEOL;
643
173
      len++;
644
    }
645

157
    switch (ch) {
646
      case '.':
647
32
        if (len == 0)
648
2
          goto end;
649
30
        pointer -= len;
650
30
        ch = pointer < end ? pointer[0] : kEOL;
651
30
        if (piece_pointer > last_piece - 2)
652
2
          goto end;
653
28
        numbers_seen = 0;
654
130
        while (ch != kEOL) {
655
96
          value = 0xffffffff;
656
96
          if (numbers_seen > 0) {
657

68
            if (ch == '.' && numbers_seen < 4) {
658
62
              pointer++;
659
62
              ch = pointer < end ? pointer[0] : kEOL;
660
            } else {
661
              goto end;
662
            }
663
          }
664
90
          if (!IsASCIIDigit(ch))
665
12
            goto end;
666
252
          while (IsASCIIDigit(ch)) {
667
100
            unsigned number = ch - '0';
668
100
            if (value == 0xffffffff) {
669
78
              value = number;
670
22
            } else if (value == 0) {
671
2
              goto end;
672
            } else {
673
20
              value = value * 10 + number;
674
            }
675
98
            if (value > 255)
676
2
              goto end;
677
96
            pointer++;
678
96
            ch = pointer < end ? pointer[0] : kEOL;
679
          }
680
74
          *piece_pointer = *piece_pointer * 0x100 + value;
681
74
          numbers_seen++;
682

74
          if (numbers_seen == 2 || numbers_seen == 4)
683
30
            piece_pointer++;
684
        }
685
6
        if (numbers_seen != 4)
686
2
          goto end;
687
4
        continue;
688
      case ':':
689
101
        pointer++;
690
101
        ch = pointer < end ? pointer[0] : kEOL;
691
101
        if (ch == kEOL)
692
2
          goto end;
693
99
        break;
694
      case kEOL:
695
16
        break;
696
      default:
697
8
        goto end;
698
    }
699
115
    *piece_pointer = value;
700
115
    piece_pointer++;
701
  }
702
703
24
  if (compress_pointer != nullptr) {
704
14
    swaps = piece_pointer - compress_pointer;
705
14
    piece_pointer = last_piece - 1;
706

44
    while (piece_pointer != &host->value.ipv6[0] && swaps > 0) {
707
16
      uint16_t temp = *piece_pointer;
708
16
      uint16_t* swap_piece = compress_pointer + swaps - 1;
709
16
      *piece_pointer = *swap_piece;
710
16
      *swap_piece = temp;
711
16
       piece_pointer--;
712
16
       swaps--;
713
    }
714

10
  } else if (compress_pointer == nullptr &&
715
             piece_pointer != last_piece) {
716
4
    goto end;
717
  }
718
20
  type = HOST_TYPE_IPV6;
719
 end:
720
66
  host->type = type;
721
66
  return type;
722
}
723
724
1815
static inline int64_t ParseNumber(const char* start, const char* end) {
725
1815
  unsigned R = 10;
726

1815
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
727
24
    start += 2;
728
24
    R = 16;
729
  }
730
1815
  if (end - start == 0) {
731
4
    return 0;
732

1811
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
733
32
    start++;
734
32
    R = 8;
735
  }
736
1811
  const char* p = start;
737
738
4218
  while (p < end) {
739
2262
    const char ch = p[0];
740

2262
    switch (R) {
741
      case 8:
742

173
        if (ch < '0' || ch > '7')
743
19
          return -1;
744
154
        break;
745
      case 10:
746
1985
        if (!IsASCIIDigit(ch))
747
1645
          return -1;
748
340
        break;
749
      case 16:
750
104
        if (!IsASCIIHexDigit(ch))
751
2
          return -1;
752
102
        break;
753
    }
754
596
    p++;
755
  }
756
145
  return strtoll(start, NULL, R);
757
}
758
759
1725
static url_host_type ParseIPv4Host(url_host* host,
760
                                   const char* input,
761
                                   size_t length) {
762
1725
  url_host_type type = HOST_TYPE_DOMAIN;
763
1725
  const char* pointer = input;
764
1725
  const char* mark = input;
765
1725
  const char* end = pointer + length;
766
1725
  int parts = 0;
767
1725
  uint32_t val = 0;
768
  uint64_t numbers[4];
769
1725
  int tooBigNumbers = 0;
770
1725
  if (length == 0)
771
    goto end;
772
773
19782
  while (pointer <= end) {
774
18008
    const char ch = pointer < end ? pointer[0] : kEOL;
775
18008
    const int remaining = end - pointer - 1;
776

18008
    if (ch == '.' || ch == kEOL) {
777
1823
      if (++parts > 4)
778
2
        goto end;
779
1821
      if (pointer == mark)
780
6
        goto end;
781
1815
      int64_t n = ParseNumber(mark, pointer);
782
1815
      if (n < 0)
783
1666
        goto end;
784
785
149
      if (n > 255) {
786
56
        tooBigNumbers++;
787
      }
788
149
      numbers[parts - 1] = n;
789
149
      mark = pointer + 1;
790

149
      if (ch == '.' && remaining == 0)
791
2
        break;
792
    }
793
16332
    pointer++;
794
  }
795
51
  CHECK_GT(parts, 0);
796
797
  // If any but the last item in numbers is greater than 255, return failure.
798
  // If the last item in numbers is greater than or equal to
799
  // 256^(5 - the number of items in numbers), return failure.
800

100
  if (tooBigNumbers > 1 ||
801

130
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
802
47
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
803
14
    type = HOST_TYPE_FAILED;
804
14
    goto end;
805
  }
806
807
37
  type = HOST_TYPE_IPV4;
808
37
  val = numbers[parts - 1];
809
86
  for (int n = 0; n < parts - 1; n++) {
810
49
    double b = 3 - n;
811
49
    val += numbers[n] * pow(256, b);
812
  }
813
814
37
  host->value.ipv4 = val;
815
 end:
816
1725
  host->type = type;
817
1725
  return type;
818
}
819
820
136
static url_host_type ParseOpaqueHost(url_host* host,
821
                                     const char* input,
822
                                     size_t length) {
823
136
  url_host_type type = HOST_TYPE_OPAQUE;
824
136
  std::string output;
825
136
  output.reserve(length * 3);
826
879
  for (size_t i = 0; i < length; i++) {
827
759
    const char ch = input[i];
828

759
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
829
16
      type = HOST_TYPE_FAILED;
830
16
      goto end;
831
    } else {
832
743
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
833
    }
834
  }
835
836
120
  host->value.opaque = output;
837
 end:
838
136
  host->type = type;
839
136
  return type;
840
}
841
842
2026
static url_host_type ParseHost(url_host* host,
843
                               const char* input,
844
                               size_t length,
845
                               bool is_special,
846
                               bool unicode = false) {
847
2026
  url_host_type type = HOST_TYPE_FAILED;
848
2026
  const char* pointer = input;
849
2026
  std::string decoded;
850
851
2026
  if (length == 0)
852
    goto end;
853
854
2026
  if (pointer[0] == '[') {
855
70
    if (pointer[length - 1] != ']')
856
4
      goto end;
857
66
    return ParseIPv6Host(host, ++pointer, length - 2);
858
  }
859
860
1956
  if (!is_special)
861
136
    return ParseOpaqueHost(host, input, length);
862
863
  // First, we have to percent decode
864
1820
  PercentDecode(input, length, &decoded);
865
866
  // Then we have to punycode toASCII
867
1820
  if (!ToASCII(decoded, &decoded))
868
61
    goto end;
869
870
  // If any of the following characters are still present, we have to fail
871
31017
  for (size_t n = 0; n < decoded.size(); n++) {
872
29292
    const char ch = decoded[n];
873
29292
    if (IsForbiddenHostCodePoint(ch)) {
874
34
      goto end;
875
    }
876
  }
877
878
  // Check to see if it's an IPv4 IP address
879
1725
  type = ParseIPv4Host(host, decoded.c_str(), decoded.length());
880

1725
  if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED)
881
    goto end;
882
883
  // If the unicode flag is set, run the result through punycode ToUnicode
884

1674
  if (unicode && !ToUnicode(decoded, &decoded))
885
    goto end;
886
887
  // It's not an IPv4 or IPv6 address, it must be a domain
888
1674
  type = HOST_TYPE_DOMAIN;
889
1674
  host->value.domain = decoded;
890
891
 end:
892
1824
  host->type = type;
893
1824
  return type;
894
}
895
896
// Locates the longest sequence of 0 segments in an IPv6 address
897
// in order to use the :: compression when serializing
898
20
static inline uint16_t* FindLongestZeroSequence(uint16_t* values,
899
                                                size_t len) {
900
20
  uint16_t* start = values;
901
20
  uint16_t* end = start + len;
902
20
  uint16_t* result = nullptr;
903
904
20
  uint16_t* current = nullptr;
905
20
  unsigned counter = 0, longest = 1;
906
907
200
  while (start < end) {
908
160
    if (*start == 0) {
909
117
      if (current == nullptr)
910
27
        current = start;
911
117
      counter++;
912
    } else {
913
43
      if (counter > longest) {
914
16
        longest = counter;
915
16
        result = current;
916
      }
917
43
      counter = 0;
918
43
      current = nullptr;
919
    }
920
160
    start++;
921
  }
922
20
  if (counter > longest)
923
3
    result = current;
924
20
  return result;
925
}
926
927
1851
static url_host_type WriteHost(url_host* host, std::string* dest) {
928
1851
  dest->clear();
929

1851
  switch (host->type) {
930
    case HOST_TYPE_DOMAIN:
931
1674
      *dest = host->value.domain;
932
1674
      break;
933
    case HOST_TYPE_IPV4: {
934
37
      dest->reserve(15);
935
37
      uint32_t value = host->value.ipv4;
936
185
      for (int n = 0; n < 4; n++) {
937
        char buf[4];
938
148
        char* buffer = buf;
939
148
        snprintf(buffer, sizeof(buf), "%d", value % 256);
940
148
        dest->insert(0, buf);
941
148
        if (n < 3)
942
111
          dest->insert(0, 1, '.');
943
148
        value /= 256;
944
      }
945
37
      break;
946
    }
947
    case HOST_TYPE_IPV6: {
948
20
      dest->reserve(41);
949
20
      *dest+= '[';
950
20
      uint16_t* start = &host->value.ipv6[0];
951
      uint16_t* compress_pointer =
952
20
          FindLongestZeroSequence(start, 8);
953
20
      bool ignore0 = false;
954
180
      for (int n = 0; n <= 7; n++) {
955
160
        uint16_t* piece = &host->value.ipv6[n];
956

160
        if (ignore0 && *piece == 0)
957
196
          continue;
958
71
        else if (ignore0)
959
15
          ignore0 = false;
960
71
        if (compress_pointer == piece) {
961
18
          *dest += n == 0 ? "::" : ":";
962
18
          ignore0 = true;
963
18
          continue;
964
        }
965
        char buf[5];
966
53
        char* buffer = buf;
967
53
        snprintf(buffer, sizeof(buf), "%x", *piece);
968
53
        *dest += buf;
969
53
        if (n < 7)
970
36
          *dest += ':';
971
      }
972
20
      *dest += ']';
973
20
      break;
974
    }
975
    case HOST_TYPE_OPAQUE:
976
120
      *dest = host->value.opaque;
977
120
      break;
978
    case HOST_TYPE_FAILED:
979
      break;
980
  }
981
1851
  return host->type;
982
}
983
984
1645
static bool ParseHost(std::string* input,
985
                      std::string* output,
986
                      bool is_special,
987
                      bool unicode = false) {
988
1645
  if (input->length() == 0) {
989
42
    output->clear();
990
42
    return true;
991
  }
992
1603
  url_host host{{""}, HOST_TYPE_DOMAIN};
993
1603
  ParseHost(&host, input->c_str(), input->length(), is_special, unicode);
994
1603
  if (host.type == HOST_TYPE_FAILED)
995
157
    return false;
996
1446
  WriteHost(&host, output);
997
1446
  return true;
998
}
999
1000
918
static inline void Copy(Environment* env,
1001
                        Local<Array> ary,
1002
                        std::vector<std::string>* vec) {
1003
918
  const int32_t len = ary->Length();
1004
918
  if (len == 0)
1005
926
    return;  // nothing to copy
1006
910
  vec->reserve(len);
1007
2154
  for (int32_t n = 0; n < len; n++) {
1008
3732
    Local<Value> val = ary->Get(env->context(), n).ToLocalChecked();
1009
2488
    if (val->IsString()) {
1010
1244
      Utf8Value value(env->isolate(), val.As<String>());
1011
1244
      vec->push_back(std::string(*value, value.length()));
1012
    }
1013
  }
1014
}
1015
1016
2564
static inline Local<Array> Copy(Environment* env,
1017
                                const std::vector<std::string>& vec) {
1018
2564
  Isolate* isolate = env->isolate();
1019
2564
  Local<Array> ary = Array::New(isolate, vec.size());
1020
7631
  for (size_t n = 0; n < vec.size(); n++)
1021
20268
    ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust();
1022
2564
  return ary;
1023
}
1024
1025
918
static inline void HarvestBase(Environment* env,
1026
                               struct url_data* base,
1027
                               Local<Object> base_obj) {
1028
918
  Local<Context> context = env->context();
1029
3672
  Local<Value> flags = GET(env, base_obj, "flags");
1030
918
  if (flags->IsInt32())
1031
1836
    base->flags = flags->Int32Value(context).FromJust();
1032
1033
3672
  Local<Value> scheme = GET(env, base_obj, "scheme");
1034
918
  base->scheme = Utf8Value(env->isolate(), scheme).out();
1035
1036
6426
  GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
1037
6426
  GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
1038
5945
  GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
1039
5516
  GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY);
1040
5514
  GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT);
1041
3672
  Local<Value> port = GET(env, base_obj, "port");
1042
918
  if (port->IsInt32())
1043
8
    base->port = port->Int32Value(context).FromJust();
1044
3672
  Local<Value> path = GET(env, base_obj, "path");
1045
918
  if (path->IsArray()) {
1046
918
    base->flags |= URL_FLAGS_HAS_PATH;
1047
1836
    Copy(env, path.As<Array>(), &(base->path));
1048
  }
1049
918
}
1050
1051
455
static inline void HarvestContext(Environment* env,
1052
                                  struct url_data* context,
1053
                                  Local<Object> context_obj) {
1054
1820
  Local<Value> flags = GET(env, context_obj, "flags");
1055
455
  if (flags->IsInt32()) {
1056
1365
    int32_t _flags = flags->Int32Value(env->context()).FromJust();
1057
455
    if (_flags & URL_FLAGS_SPECIAL)
1058
397
      context->flags |= URL_FLAGS_SPECIAL;
1059
455
    if (_flags & URL_FLAGS_CANNOT_BE_BASE)
1060
4
      context->flags |= URL_FLAGS_CANNOT_BE_BASE;
1061
455
    if (_flags & URL_FLAGS_HAS_USERNAME)
1062
14
      context->flags |= URL_FLAGS_HAS_USERNAME;
1063
455
    if (_flags & URL_FLAGS_HAS_PASSWORD)
1064
8
      context->flags |= URL_FLAGS_HAS_PASSWORD;
1065
455
    if (_flags & URL_FLAGS_HAS_HOST)
1066
445
      context->flags |= URL_FLAGS_HAS_HOST;
1067
  }
1068
1820
  Local<Value> scheme = GET(env, context_obj, "scheme");
1069
910
  if (scheme->IsString()) {
1070
455
    Utf8Value value(env->isolate(), scheme);
1071
455
    context->scheme.assign(*value, value.length());
1072
  }
1073
1820
  Local<Value> port = GET(env, context_obj, "port");
1074
455
  if (port->IsInt32())
1075
63
    context->port = port->Int32Value(env->context()).FromJust();
1076
455
  if (context->flags & URL_FLAGS_HAS_USERNAME) {
1077
56
    Local<Value> username = GET(env, context_obj, "username");
1078
28
    CHECK(username->IsString());
1079
14
    Utf8Value value(env->isolate(), username);
1080
14
    context->username.assign(*value, value.length());
1081
  }
1082
455
  if (context->flags & URL_FLAGS_HAS_PASSWORD) {
1083
32
    Local<Value> password = GET(env, context_obj, "password");
1084
16
    CHECK(password->IsString());
1085
8
    Utf8Value value(env->isolate(), password);
1086
8
    context->password.assign(*value, value.length());
1087
  }
1088
1820
  Local<Value> host = GET(env, context_obj, "host");
1089
910
  if (host->IsString()) {
1090
445
    Utf8Value value(env->isolate(), host);
1091
445
    context->host.assign(*value, value.length());
1092
  }
1093
455
}
1094
1095
// Single dot segment can be ".", "%2e", or "%2E"
1096
63761
static inline bool IsSingleDotSegment(std::string str) {
1097
63761
  switch (str.size()) {
1098
    case 1:
1099
495
      return str == ".";
1100
    case 3:
1101
1296
      return str[0] == '%' &&
1102

1292
             str[1] == '2' &&
1103
1292
             ASCIILowercase(str[2]) == 'e';
1104
    default:
1105
61998
      return false;
1106
  }
1107
}
1108
1109
// Double dot segment can be:
1110
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1111
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
1112
31986
static inline bool IsDoubleDotSegment(std::string str) {
1113

31986
  switch (str.size()) {
1114
    case 2:
1115
190
      return str == "..";
1116
    case 4:
1117

7680
      if (str[0] != '.' && str[0] != '%')
1118
7673
        return false;
1119
11
      return ((str[0] == '.' &&
1120
6
               str[1] == '%' &&
1121
4
               str[2] == '2' &&
1122

19
               ASCIILowercase(str[3]) == 'e') ||
1123
8
              (str[0] == '%' &&
1124
6
               str[1] == '2' &&
1125
6
               ASCIILowercase(str[2]) == 'e' &&
1126
10
               str[3] == '.'));
1127
    case 6:
1128
134
      return (str[0] == '%' &&
1129
8
              str[1] == '2' &&
1130
6
              ASCIILowercase(str[2]) == 'e' &&
1131
4
              str[3] == '%' &&
1132

134
              str[4] == '2' &&
1133
132
              ASCIILowercase(str[5]) == 'e');
1134
    default:
1135
23986
      return false;
1136
  }
1137
}
1138
1139
291
static inline void ShortenUrlPath(struct url_data* url) {
1140
291
  if (url->path.empty()) return;
1141


259
  if (url->path.size() == 1 && url->scheme == "file:" &&
1142
11
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
1143
248
  url->path.pop_back();
1144
}
1145
1146
6747
void URL::Parse(const char* input,
1147
                size_t len,
1148
                enum url_parse_state state_override,
1149
                struct url_data* url,
1150
                bool has_url,
1151
                const struct url_data* base,
1152
                bool has_base) {
1153
6747
  const char* p = input;
1154
6747
  const char* end = input + len;
1155
1156
6747
  if (!has_url) {
1157
6302
    for (const char* ptr = p; ptr < end; ptr++) {
1158
6232
      if (IsC0ControlOrSpace(*ptr))
1159
10
        p++;
1160
      else
1161
6222
        break;
1162
    }
1163
6294
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
1164
6224
      if (IsC0ControlOrSpace(*ptr))
1165
2
        end--;
1166
      else
1167
6222
        break;
1168
    }
1169
6292
    len = end - p;
1170
  }
1171
1172
6747
  std::string whitespace_stripped;
1173
6747
  whitespace_stripped.reserve(len);
1174
360621
  for (const char* ptr = p; ptr < end; ptr++)
1175
353874
    if (!IsASCIITabOrNewline(*ptr))
1176
353810
      whitespace_stripped += *ptr;
1177
1178
6747
  input = whitespace_stripped.c_str();
1179
6747
  len = whitespace_stripped.size();
1180
6747
  p = input;
1181
6747
  end = input + len;
1182
1183
6747
  bool atflag = false;
1184
6747
  bool sbflag = false;
1185
6747
  bool uflag = false;
1186
1187
13006
  std::string buffer;
1188
6747
  url->scheme.reserve(len);
1189
6747
  url->username.reserve(len);
1190
6747
  url->password.reserve(len);
1191
6747
  url->host.reserve(len);
1192
6747
  url->path.reserve(len);
1193
6747
  url->query.reserve(len);
1194
6747
  url->fragment.reserve(len);
1195
6747
  buffer.reserve(len);
1196
1197
  // Set the initial parse state.
1198
6747
  const bool has_state_override = state_override != kUnknownState;
1199
  enum url_parse_state state = has_state_override ? state_override :
1200
6747
                                                    kSchemeStart;
1201
1202

6747
  if (state < kSchemeStart || state > kFragment) {
1203
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1204
    return;
1205
  }
1206
1207

396970
  while (p <= end) {
1208
383964
    const char ch = p < end ? p[0] : kEOL;
1209
383964
    bool special = (url->flags & URL_FLAGS_SPECIAL);
1210
    bool cannot_be_base;
1211

383964
    const bool special_back_slash = (special && ch == '\\');
1212
1213





383964
    switch (state) {
1214
      case kSchemeStart:
1215
6318
        if (IsASCIIAlpha(ch)) {
1216
5819
          buffer += ASCIILowercase(ch);
1217
5819
          state = kScheme;
1218
499
        } else if (!has_state_override) {
1219
496
          state = kNoScheme;
1220
496
          continue;
1221
        } else {
1222
3
          url->flags |= URL_FLAGS_FAILED;
1223
3
          return;
1224
        }
1225
5819
        break;
1226
      case kScheme:
1227


24191
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1228
18372
          buffer += ASCIILowercase(ch);
1229

5819
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1230

5733
          if (has_state_override && buffer.size() == 0) {
1231
            url->flags |= URL_FLAGS_TERMINATED;
1232
            return;
1233
          }
1234
5733
          buffer += ':';
1235
1236
5733
          bool new_is_special = IsSpecial(buffer);
1237
1238
5733
          if (has_state_override) {
1239

45
            if ((special != new_is_special) ||
1240
14
                ((buffer == "file:") &&
1241
4
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1242
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1243
1
                  (url->port != -1)))) {
1244
13
              url->flags |= URL_FLAGS_TERMINATED;
1245
13
              return;
1246
            }
1247
1248
            // File scheme && (host == empty or null) check left to JS-land
1249
            // as it can be done before even entering C++ binding.
1250
          }
1251
1252
5720
          url->scheme = buffer;
1253
5720
          url->port = NormalizePort(url->scheme, url->port);
1254
5720
          if (new_is_special) {
1255
4969
            url->flags |= URL_FLAGS_SPECIAL;
1256
4969
            special = true;
1257
          } else {
1258
751
            url->flags &= ~URL_FLAGS_SPECIAL;
1259
751
            special = false;
1260
          }
1261
5720
          buffer.clear();
1262
5720
          if (has_state_override)
1263
8
            return;
1264
5712
          if (url->scheme == "file:") {
1265
3630
            state = kFile;
1266

3418
          } else if (special &&
1267

2582
                     has_base &&
1268
500
                     url->scheme == base->scheme) {
1269
178
            state = kSpecialRelativeOrAuthority;
1270
1904
          } else if (special) {
1271
1158
            state = kSpecialAuthoritySlashes;
1272
746
          } else if (p[1] == '/') {
1273
218
            state = kPathOrAuthority;
1274
218
            p++;
1275
          } else {
1276
528
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1277
528
            url->flags |= URL_FLAGS_HAS_PATH;
1278
528
            url->path.push_back("");
1279
528
            state = kCannotBeBase;
1280
5712
          }
1281
86
        } else if (!has_state_override) {
1282
84
          buffer.clear();
1283
84
          state = kNoScheme;
1284
84
          p = input;
1285
84
          continue;
1286
        } else {
1287
2
          url->flags |= URL_FLAGS_FAILED;
1288
2
          return;
1289
        }
1290
24084
        break;
1291
      case kNoScheme:
1292

580
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1293

580
        if (!has_base || (cannot_be_base && ch != '#')) {
1294
140
          url->flags |= URL_FLAGS_FAILED;
1295
140
          return;
1296

440
        } else if (cannot_be_base && ch == '#') {
1297
14
          url->scheme = base->scheme;
1298
14
          if (IsSpecial(url->scheme)) {
1299
            url->flags |= URL_FLAGS_SPECIAL;
1300
            special = true;
1301
          } else {
1302
14
            url->flags &= ~URL_FLAGS_SPECIAL;
1303
14
            special = false;
1304
          }
1305
14
          if (base->flags & URL_FLAGS_HAS_PATH) {
1306
14
            url->flags |= URL_FLAGS_HAS_PATH;
1307
14
            url->path = base->path;
1308
          }
1309
14
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1310
2
            url->flags |= URL_FLAGS_HAS_QUERY;
1311
2
            url->query = base->query;
1312
          }
1313
14
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1314
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1315
            url->fragment = base->fragment;
1316
          }
1317
14
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1318
14
          state = kFragment;
1319

852
        } else if (has_base &&
1320
426
                   base->scheme != "file:") {
1321
142
          state = kRelative;
1322
142
          continue;
1323
        } else {
1324
284
          url->scheme = "file:";
1325
284
          url->flags |= URL_FLAGS_SPECIAL;
1326
284
          special = true;
1327
284
          state = kFile;
1328
284
          continue;
1329
        }
1330
14
        break;
1331
      case kSpecialRelativeOrAuthority:
1332

178
        if (ch == '/' && p[1] == '/') {
1333
162
          state = kSpecialAuthorityIgnoreSlashes;
1334
162
          p++;
1335
        } else {
1336
16
          state = kRelative;
1337
16
          continue;
1338
        }
1339
162
        break;
1340
      case kPathOrAuthority:
1341
218
        if (ch == '/') {
1342
164
          state = kAuthority;
1343
        } else {
1344
54
          state = kPath;
1345
54
          continue;
1346
        }
1347
164
        break;
1348
      case kRelative:
1349
158
        url->scheme = base->scheme;
1350
158
        if (IsSpecial(url->scheme)) {
1351
115
          url->flags |= URL_FLAGS_SPECIAL;
1352
115
          special = true;
1353
        } else {
1354
43
          url->flags &= ~URL_FLAGS_SPECIAL;
1355
43
          special = false;
1356
        }
1357

158
        switch (ch) {
1358
          case kEOL:
1359
8
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1360
8
              url->flags |= URL_FLAGS_HAS_USERNAME;
1361
8
              url->username = base->username;
1362
            }
1363
8
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1364
8
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1365
8
              url->password = base->password;
1366
            }
1367
8
            if (base->flags & URL_FLAGS_HAS_HOST) {
1368
8
              url->flags |= URL_FLAGS_HAS_HOST;
1369
8
              url->host = base->host;
1370
            }
1371
8
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1372
              url->flags |= URL_FLAGS_HAS_QUERY;
1373
              url->query = base->query;
1374
            }
1375
8
            if (base->flags & URL_FLAGS_HAS_PATH) {
1376
8
              url->flags |= URL_FLAGS_HAS_PATH;
1377
8
              url->path = base->path;
1378
            }
1379
8
            url->port = base->port;
1380
8
            break;
1381
          case '/':
1382
35
            state = kRelativeSlash;
1383
35
            break;
1384
          case '?':
1385
21
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1386
21
              url->flags |= URL_FLAGS_HAS_USERNAME;
1387
21
              url->username = base->username;
1388
            }
1389
21
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1390
21
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1391
21
              url->password = base->password;
1392
            }
1393
21
            if (base->flags & URL_FLAGS_HAS_HOST) {
1394
19
              url->flags |= URL_FLAGS_HAS_HOST;
1395
19
              url->host = base->host;
1396
            }
1397
21
            if (base->flags & URL_FLAGS_HAS_PATH) {
1398
21
              url->flags |= URL_FLAGS_HAS_PATH;
1399
21
              url->path = base->path;
1400
            }
1401
21
            url->port = base->port;
1402
21
            state = kQuery;
1403
21
            break;
1404
          case '#':
1405
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1406
18
              url->flags |= URL_FLAGS_HAS_USERNAME;
1407
18
              url->username = base->username;
1408
            }
1409
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1410
18
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1411
18
              url->password = base->password;
1412
            }
1413
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1414
16
              url->flags |= URL_FLAGS_HAS_HOST;
1415
16
              url->host = base->host;
1416
            }
1417
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1418
              url->flags |= URL_FLAGS_HAS_QUERY;
1419
              url->query = base->query;
1420
            }
1421
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1422
18
              url->flags |= URL_FLAGS_HAS_PATH;
1423
18
              url->path = base->path;
1424
            }
1425
18
            url->port = base->port;
1426
18
            state = kFragment;
1427
18
            break;
1428
          default:
1429
76
            if (special_back_slash) {
1430
4
              state = kRelativeSlash;
1431
            } else {
1432
72
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1433
69
                url->flags |= URL_FLAGS_HAS_USERNAME;
1434
69
                url->username = base->username;
1435
              }
1436
72
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1437
69
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1438
69
                url->password = base->password;
1439
              }
1440
72
              if (base->flags & URL_FLAGS_HAS_HOST) {
1441
66
                url->flags |= URL_FLAGS_HAS_HOST;
1442
66
                url->host = base->host;
1443
              }
1444
72
              if (base->flags & URL_FLAGS_HAS_PATH) {
1445
72
                url->flags |= URL_FLAGS_HAS_PATH;
1446
72
                url->path = base->path;
1447
72
                ShortenUrlPath(url);
1448
              }
1449
72
              url->port = base->port;
1450
72
              state = kPath;
1451
72
              continue;
1452
            }
1453
        }
1454
86
        break;
1455
      case kRelativeSlash:
1456


39
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1457
8
          state = kSpecialAuthorityIgnoreSlashes;
1458
31
        } else if (ch == '/') {
1459
3
          state = kAuthority;
1460
        } else {
1461
28
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1462
28
            url->flags |= URL_FLAGS_HAS_USERNAME;
1463
28
            url->username = base->username;
1464
          }
1465
28
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1466
28
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1467
28
            url->password = base->password;
1468
          }
1469
28
          if (base->flags & URL_FLAGS_HAS_HOST) {
1470
26
            url->flags |= URL_FLAGS_HAS_HOST;
1471
26
            url->host = base->host;
1472
          }
1473
28
          url->port = base->port;
1474
28
          state = kPath;
1475
28
          continue;
1476
        }
1477
11
        break;
1478
      case kSpecialAuthoritySlashes:
1479
1158
        state = kSpecialAuthorityIgnoreSlashes;
1480

1158
        if (ch == '/' && p[1] == '/') {
1481
1076
          p++;
1482
        } else {
1483
82
          continue;
1484
        }
1485
1076
        break;
1486
      case kSpecialAuthorityIgnoreSlashes:
1487

1370
        if (ch != '/' && ch != '\\') {
1488
1328
          state = kAuthority;
1489
1328
          continue;
1490
        }
1491
42
        break;
1492
      case kAuthority:
1493
18421
        if (ch == '@') {
1494
132
          if (atflag) {
1495
12
            buffer.reserve(buffer.size() + 3);
1496
12
            buffer.insert(0, "%40");
1497
          }
1498
132
          atflag = true;
1499
132
          const size_t blen = buffer.size();
1500

132
          if (blen > 0 && buffer[0] != ':') {
1501
84
            url->flags |= URL_FLAGS_HAS_USERNAME;
1502
          }
1503
648
          for (size_t n = 0; n < blen; n++) {
1504
516
            const char bch = buffer[n];
1505
516
            if (bch == ':') {
1506
78
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1507
78
              if (!uflag) {
1508
76
                uflag = true;
1509
76
                continue;
1510
              }
1511
            }
1512
440
            if (uflag) {
1513
173
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1514
            } else {
1515
267
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1516
            }
1517
          }
1518
132
          buffer.clear();
1519

18289
        } else if (ch == kEOL ||
1520
16827
                   ch == '/' ||
1521
16811
                   ch == '?' ||
1522
16802
                   ch == '#' ||
1523
                   special_back_slash) {
1524

1495
          if (atflag && buffer.size() == 0) {
1525
26
            url->flags |= URL_FLAGS_FAILED;
1526
26
            return;
1527
          }
1528
1469
          p -= buffer.size() + 1;
1529
1469
          buffer.clear();
1530
1469
          state = kHost;
1531
        } else {
1532
16794
          buffer += ch;
1533
        }
1534
18395
        break;
1535
      case kHost:
1536
      case kHostname:
1537

18779
        if (has_state_override && url->scheme == "file:") {
1538
6
          state = kFileHost;
1539
6
          continue;
1540

18773
        } else if (ch == ':' && !sbflag) {
1541
394
          if (buffer.size() == 0) {
1542
10
            url->flags |= URL_FLAGS_FAILED;
1543
10
            return;
1544
          }
1545
384
          url->flags |= URL_FLAGS_HAS_HOST;
1546
384
          if (!ParseHost(&buffer, &url->host, special)) {
1547
2
            url->flags |= URL_FLAGS_FAILED;
1548
2
            return;
1549
          }
1550
382
          buffer.clear();
1551
382
          state = kPort;
1552
762
          if (state_override == kHostname) {
1553
2
            return;
1554
          }
1555

18379
        } else if (ch == kEOL ||
1556
17207
                   ch == '/' ||
1557
17187
                   ch == '?' ||
1558
17174
                   ch == '#' ||
1559
                   special_back_slash) {
1560
1215
          p--;
1561

1215
          if (special && buffer.size() == 0) {
1562
8
            url->flags |= URL_FLAGS_FAILED;
1563
8
            return;
1564
          }
1565

1326
          if (has_state_override &&
1566

1229
              buffer.size() == 0 &&
1567

50
              ((url->username.size() > 0 || url->password.size() > 0) ||
1568
16
               url->port != -1)) {
1569
4
            url->flags |= URL_FLAGS_TERMINATED;
1570
4
            return;
1571
          }
1572
1203
          url->flags |= URL_FLAGS_HAS_HOST;
1573
1203
          if (!ParseHost(&buffer, &url->host, special)) {
1574
145
            url->flags |= URL_FLAGS_FAILED;
1575
145
            return;
1576
          }
1577
1058
          buffer.clear();
1578
1058
          state = kPathStart;
1579
2037
          if (has_state_override) {
1580
79
            return;
1581
          }
1582
        } else {
1583
17164
          if (ch == '[')
1584
67
            sbflag = true;
1585
17164
          if (ch == ']')
1586
65
            sbflag = false;
1587
17164
          buffer += ch;
1588
        }
1589
18523
        break;
1590
      case kPort:
1591
2166
        if (IsASCIIDigit(ch)) {
1592
1769
          buffer += ch;
1593

397
        } else if (has_state_override ||
1594
129
                   ch == kEOL ||
1595
18
                   ch == '/' ||
1596
18
                   ch == '?' ||
1597
18
                   ch == '#' ||
1598
                   special_back_slash) {
1599
379
          if (buffer.size() > 0) {
1600
374
            unsigned port = 0;
1601
            // the condition port <= 0xffff prevents integer overflow
1602

2035
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1603
1661
              port = port * 10 + buffer[i] - '0';
1604
374
            if (port > 0xffff) {
1605
              // TODO(TimothyGu): This hack is currently needed for the host
1606
              // setter since it needs access to hostname if it is valid, and
1607
              // if the FAILED flag is set the entire response to JS layer
1608
              // will be empty.
1609
13
              if (state_override == kHost)
1610
1
                url->port = -1;
1611
              else
1612
12
                url->flags |= URL_FLAGS_FAILED;
1613
13
              return;
1614
            }
1615
            // the port is valid
1616
361
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1617
361
            buffer.clear();
1618
5
          } else if (has_state_override) {
1619
            // TODO(TimothyGu): Similar case as above.
1620
1
            if (state_override == kHost)
1621
1
              url->port = -1;
1622
            else
1623
              url->flags |= URL_FLAGS_TERMINATED;
1624
1
            return;
1625
          }
1626
365
          state = kPathStart;
1627
365
          continue;
1628
        } else {
1629
18
          url->flags |= URL_FLAGS_FAILED;
1630
18
          return;
1631
        }
1632
1769
        break;
1633
      case kFile:
1634
3914
        url->scheme = "file:";
1635

3914
        if (ch == '/' || ch == '\\') {
1636
3775
          state = kFileSlash;
1637

139
        } else if (has_base && base->scheme == "file:") {
1638

132
          switch (ch) {
1639
            case kEOL:
1640
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1641
2
                url->flags |= URL_FLAGS_HAS_HOST;
1642
2
                url->host = base->host;
1643
              }
1644
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1645
2
                url->flags |= URL_FLAGS_HAS_PATH;
1646
2
                url->path = base->path;
1647
              }
1648
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1649
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1650
2
                url->query = base->query;
1651
              }
1652
2
              break;
1653
            case '?':
1654
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1655
2
                url->flags |= URL_FLAGS_HAS_HOST;
1656
2
                url->host = base->host;
1657
              }
1658
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1659
2
                url->flags |= URL_FLAGS_HAS_PATH;
1660
2
                url->path = base->path;
1661
              }
1662
2
              url->flags |= URL_FLAGS_HAS_QUERY;
1663
2
              url->query.clear();
1664
2
              state = kQuery;
1665
2
              break;
1666
            case '#':
1667
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1668
2
                url->flags |= URL_FLAGS_HAS_HOST;
1669
2
                url->host = base->host;
1670
              }
1671
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1672
2
                url->flags |= URL_FLAGS_HAS_PATH;
1673
2
                url->path = base->path;
1674
              }
1675
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1676
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1677
2
                url->query = base->query;
1678
              }
1679
2
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1680
2
              url->fragment.clear();
1681
2
              state = kFragment;
1682
2
              break;
1683
            default:
1684
126
              if (!StartsWithWindowsDriveLetter(p, end)) {
1685
117
                if (base->flags & URL_FLAGS_HAS_HOST) {
1686
117
                  url->flags |= URL_FLAGS_HAS_HOST;
1687
117
                  url->host = base->host;
1688
                }
1689
117
                if (base->flags & URL_FLAGS_HAS_PATH) {
1690
117
                  url->flags |= URL_FLAGS_HAS_PATH;
1691
117
                  url->path = base->path;
1692
                }
1693
117
                ShortenUrlPath(url);
1694
              }
1695
126
              state = kPath;
1696
126
              continue;
1697
          }
1698
        } else {
1699
7
          state = kPath;
1700
7
          continue;
1701
        }
1702
3781
        break;
1703
      case kFileSlash:
1704

3775
        if (ch == '/' || ch == '\\') {
1705
3626
          state = kFileHost;
1706
        } else {
1707

298
          if (has_base &&
1708

295
              base->scheme == "file:" &&
1709
146
              !StartsWithWindowsDriveLetter(p, end)) {
1710
141
            if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1711
1
              url->flags |= URL_FLAGS_HAS_PATH;
1712
1
              url->path.push_back(base->path[0]);
1713
            } else {
1714
140
              if (base->flags & URL_FLAGS_HAS_HOST) {
1715
140
                url->flags |= URL_FLAGS_HAS_HOST;
1716
140
                url->host = base->host;
1717
              } else {
1718
                url->flags &= ~URL_FLAGS_HAS_HOST;
1719
                url->host.clear();
1720
              }
1721
            }
1722
          }
1723
149
          state = kPath;
1724
149
          continue;
1725
        }
1726
3626
        break;
1727
      case kFileHost:
1728

3982
        if (ch == kEOL ||
1729
355
            ch == '/' ||
1730
350
            ch == '\\' ||
1731
350
            ch == '?' ||
1732
            ch == '#') {
1733

10890
          if (!has_state_override &&
1734

3639
              buffer.size() == 2 &&
1735
7
              IsWindowsDriveLetter(buffer)) {
1736
4
            state = kPath;
1737
3628
          } else if (buffer.size() == 0) {
1738
3570
            url->flags |= URL_FLAGS_HAS_HOST;
1739
3570
            url->host.clear();
1740
3570
            if (has_state_override)
1741
2
              return;
1742
3568
            state = kPathStart;
1743
          } else {
1744
58
            std::string host;
1745
58
            if (!ParseHost(&buffer, &host, special)) {
1746
10
              url->flags |= URL_FLAGS_FAILED;
1747
10
              return;
1748
            }
1749
48
            if (host == "localhost")
1750
11
              host.clear();
1751
48
            url->flags |= URL_FLAGS_HAS_HOST;
1752
48
            url->host = host;
1753
48
            if (has_state_override)
1754
2
              return;
1755
46
            buffer.clear();
1756
46
            state = kPathStart;
1757
          }
1758
3618
          continue;
1759
        } else {
1760
350
          buffer += ch;
1761
        }
1762
350
        break;
1763
      case kPathStart:
1764
5097
        if (IsSpecial(url->scheme)) {
1765
4943
          state = kPath;
1766

4943
          if (ch != '/' && ch != '\\') {
1767
541
            continue;
1768
          }
1769

154
        } else if (!has_state_override && ch == '?') {
1770
3
          url->flags |= URL_FLAGS_HAS_QUERY;
1771
3
          url->query.clear();
1772
3
          state = kQuery;
1773

151
        } else if (!has_state_override && ch == '#') {
1774
3
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1775
3
          url->fragment.clear();
1776
3
          state = kFragment;
1777
148
        } else if (ch != kEOL) {
1778
117
          state = kPath;
1779
117
          if (ch != '/') {
1780
6
            continue;
1781
          }
1782
        }
1783
4550
        break;
1784
      case kPath:
1785

286880
        if (ch == kEOL ||
1786
255022
            ch == '/' ||
1787
254989
            special_back_slash ||
1788

499592
            (!has_state_override && (ch == '?' || ch == '#'))) {
1789
31986
          if (IsDoubleDotSegment(buffer)) {
1790
102
            ShortenUrlPath(url);
1791

102
            if (ch != '/' && !special_back_slash) {
1792
29
              url->flags |= URL_FLAGS_HAS_PATH;
1793
29
              url->path.push_back("");
1794
            }
1795

95722
          } else if (IsSingleDotSegment(buffer) &&
1796

95660
                     ch != '/' && !special_back_slash) {
1797
7
            url->flags |= URL_FLAGS_HAS_PATH;
1798
7
            url->path.push_back("");
1799
31877
          } else if (!IsSingleDotSegment(buffer)) {
1800

93553
            if (url->scheme == "file:" &&
1801
33830
                url->path.empty() &&
1802

35755
                buffer.size() == 2 &&
1803
36
                IsWindowsDriveLetter(buffer)) {
1804

51
              if ((url->flags & URL_FLAGS_HAS_HOST) &&
1805
16
                  !url->host.empty()) {
1806
3
                url->host.clear();
1807
3
                url->flags |= URL_FLAGS_HAS_HOST;
1808
              }
1809
35
              buffer[1] = ':';
1810
            }
1811
31814
            url->flags |= URL_FLAGS_HAS_PATH;
1812
31814
            std::string segment(buffer.c_str(), buffer.size());
1813
31814
            url->path.push_back(segment);
1814
          }
1815
31986
          buffer.clear();
1816

36008
          if (url->scheme == "file:" &&
1817
26000
              (ch == kEOL ||
1818
25988
               ch == '?' ||
1819
               ch == '#')) {
1820

8065
            while (url->path.size() > 1 && url->path[0].length() == 0) {
1821
21
              url->path.erase(url->path.begin());
1822
            }
1823
          }
1824
31986
          if (ch == '?') {
1825
84
            url->flags |= URL_FLAGS_HAS_QUERY;
1826
84
            state = kQuery;
1827
31902
          } else if (ch == '#') {
1828
11
            state = kFragment;
1829
31986
          }
1830
        } else {
1831
254894
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
1832
        }
1833
286880
        break;
1834
      case kCannotBeBase:
1835
3542
        switch (ch) {
1836
          case '?':
1837
2
            state = kQuery;
1838
2
            break;
1839
          case '#':
1840
5
            state = kFragment;
1841
5
            break;
1842
          default:
1843
3535
            if (url->path.size() == 0)
1844
              url->path.push_back("");
1845

3535
            if (url->path.size() > 0 && ch != kEOL)
1846
3014
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
1847
        }
1848
3542
        break;
1849
      case kQuery:
1850

2727
        if (ch == kEOL || (!has_state_override && ch == '#')) {
1851
225
          url->flags |= URL_FLAGS_HAS_QUERY;
1852
225
          url->query = buffer;
1853
225
          buffer.clear();
1854
450
          if (ch == '#')
1855
46
            state = kFragment;
1856
        } else {
1857
2502
          AppendOrEscape(&buffer, ch, QUERY_ENCODE_SET);
1858
        }
1859
2727
        break;
1860
      case kFragment:
1861
471
        switch (ch) {
1862
          case kEOL:
1863
113
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1864
113
            url->fragment = buffer;
1865
113
            break;
1866
          case 0:
1867
2
            break;
1868
          default:
1869
356
            AppendOrEscape(&buffer, ch, C0_CONTROL_ENCODE_SET);
1870
        }
1871
471
        break;
1872
      default:
1873
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1874
        return;
1875
    }
1876
1877
376072
    p++;
1878
6259
  }
1879
}  // NOLINT(readability/fn_size)
1880
1881
2823
static inline void SetArgs(Environment* env,
1882
                           Local<Value> argv[],
1883
                           const struct url_data* url) {
1884
2823
  Isolate* isolate = env->isolate();
1885
5646
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
1886
5646
  argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
1887
2823
  if (url->flags & URL_FLAGS_HAS_USERNAME)
1888
434
    argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
1889
2823
  if (url->flags & URL_FLAGS_HAS_PASSWORD)
1890
426
    argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password);
1891
2823
  if (url->flags & URL_FLAGS_HAS_HOST)
1892
4358
    argv[ARG_HOST] = UTF8STRING(isolate, url->host);
1893
2823
  if (url->flags & URL_FLAGS_HAS_QUERY)
1894
454
    argv[ARG_QUERY] = UTF8STRING(isolate, url->query);
1895
2823
  if (url->flags & URL_FLAGS_HAS_FRAGMENT)
1896
218
    argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment);
1897
2823
  if (url->port > -1)
1898
704
    argv[ARG_PORT] = Integer::New(isolate, url->port);
1899
2823
  if (url->flags & URL_FLAGS_HAS_PATH)
1900
5128
    argv[ARG_PATH] = Copy(env, url->path);
1901
2823
}
1902
1903
3069
static void Parse(Environment* env,
1904
                  Local<Value> recv,
1905
                  const char* input,
1906
                  const size_t len,
1907
                  enum url_parse_state state_override,
1908
                  Local<Value> base_obj,
1909
                  Local<Value> context_obj,
1910
                  Local<Function> cb,
1911
                  Local<Value> error_cb) {
1912
3069
  Isolate* isolate = env->isolate();
1913
3069
  Local<Context> context = env->context();
1914
3069
  HandleScope handle_scope(isolate);
1915
3052
  Context::Scope context_scope(context);
1916
1917
3069
  const bool has_context = context_obj->IsObject();
1918
3069
  const bool has_base = base_obj->IsObject();
1919
1920
6121
  struct url_data base;
1921
6121
  struct url_data url;
1922
3069
  if (has_context)
1923
455
    HarvestContext(env, &url, context_obj.As<Object>());
1924
3069
  if (has_base)
1925
918
    HarvestBase(env, &base, base_obj.As<Object>());
1926
1927
3069
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
1928

3069
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1929
455
      ((state_override != kUnknownState) &&
1930
455
       (url.flags & URL_FLAGS_TERMINATED)))
1931
3086
    return;
1932
1933
  // Define the return value placeholders
1934
  const Local<Value> undef = Undefined(isolate);
1935
  const Local<Value> null = Null(isolate);
1936
3052
  if (!(url.flags & URL_FLAGS_FAILED)) {
1937
    Local<Value> argv[9] = {
1938
      undef,
1939
      undef,
1940
      undef,
1941
      undef,
1942
      null,  // host defaults to null
1943
      null,  // port defaults to null
1944
      undef,
1945
      null,  // query defaults to null
1946
      null,  // fragment defaults to null
1947
2779
    };
1948
2779
    SetArgs(env, argv, &url);
1949
8337
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
1950
273
  } else if (error_cb->IsFunction()) {
1951
226
    Local<Value> argv[2] = { undef, undef };
1952
452
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
1953
    argv[ERR_ARG_INPUT] =
1954
      String::NewFromUtf8(env->isolate(),
1955
                          input,
1956
452
                          v8::NewStringType::kNormal).ToLocalChecked();
1957
904
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
1958
678
        .FromMaybe(Local<Value>());
1959
3052
  }
1960
}
1961
1962
3069
static void Parse(const FunctionCallbackInfo<Value>& args) {
1963
3069
  Environment* env = Environment::GetCurrent(args);
1964
3069
  CHECK_GE(args.Length(), 5);
1965
9207
  CHECK(args[0]->IsString());  // input
1966



18231
  CHECK(args[2]->IsUndefined() ||  // base context
1967
        args[2]->IsNull() ||
1968
        args[2]->IsObject());
1969



14551
  CHECK(args[3]->IsUndefined() ||  // context
1970
        args[3]->IsNull() ||
1971
        args[3]->IsObject());
1972
6138
  CHECK(args[4]->IsFunction());  // complete callback
1973


17504
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
1974
1975
3069
  Utf8Value input(env->isolate(), args[0]);
1976
3069
  enum url_parse_state state_override = kUnknownState;
1977
6138
  if (args[1]->IsNumber()) {
1978
    state_override = static_cast<enum url_parse_state>(
1979
12276
        args[1]->Uint32Value(env->context()).FromJust());
1980
  }
1981
1982
  Parse(env, args.This(),
1983
3069
        *input, input.length(),
1984
        state_override,
1985
        args[2],
1986
        args[3],
1987
        args[4].As<Function>(),
1988
9207
        args[5]);
1989
3069
}
1990
1991
22
static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
1992
22
  Environment* env = Environment::GetCurrent(args);
1993
22
  CHECK_GE(args.Length(), 1);
1994
66
  CHECK(args[0]->IsString());
1995
22
  Utf8Value value(env->isolate(), args[0]);
1996
44
  std::string output;
1997
22
  const size_t len = value.length();
1998
22
  output.reserve(len);
1999
233
  for (size_t n = 0; n < len; n++) {
2000
211
    const char ch = (*value)[n];
2001
211
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2002
  }
2003
  args.GetReturnValue().Set(
2004
      String::NewFromUtf8(env->isolate(),
2005
                          output.c_str(),
2006
88
                          v8::NewStringType::kNormal).ToLocalChecked());
2007
22
}
2008
2009
11
static void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2010
11
  Environment* env = Environment::GetCurrent(args);
2011
11
  CHECK_GE(args.Length(), 2);
2012
33
  CHECK(args[0]->IsString());
2013
22
  CHECK(args[1]->IsNumber());
2014
2015
11
  TwoByteValue value(env->isolate(), args[0]);
2016
11
  const size_t n = value.length();
2017
2018
44
  const int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2019
11
  CHECK_GE(start, 0);
2020
2021
32
  for (size_t i = start; i < n; i++) {
2022
21
    char16_t c = value[i];
2023
21
    if (!IsUnicodeSurrogate(c)) {
2024
8
      continue;
2025

13
    } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) {
2026
12
      value[i] = kUnicodeReplacementCharacter;
2027
    } else {
2028
1
      char16_t d = value[i + 1];
2029
1
      if (IsUnicodeTrail(d)) {
2030
        i++;
2031
      } else {
2032
1
        value[i] = kUnicodeReplacementCharacter;
2033
      }
2034
    }
2035
  }
2036
2037
  args.GetReturnValue().Set(
2038
      String::NewFromTwoByte(env->isolate(),
2039
11
                             *value,
2040
                             v8::NewStringType::kNormal,
2041
44
                             n).ToLocalChecked());
2042
11
}
2043
2044
222
static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2045
222
  Environment* env = Environment::GetCurrent(args);
2046
222
  CHECK_GE(args.Length(), 1);
2047
666
  CHECK(args[0]->IsString());
2048
222
  Utf8Value value(env->isolate(), args[0]);
2049
2050
435
  url_host host{{""}, HOST_TYPE_DOMAIN};
2051
  // Assuming the host is used for a special scheme.
2052
222
  ParseHost(&host, *value, value.length(), true);
2053
222
  if (host.type == HOST_TYPE_FAILED) {
2054
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2055
231
    return;
2056
  }
2057
426
  std::string out;
2058
213
  WriteHost(&host, &out);
2059
  args.GetReturnValue().Set(
2060
      String::NewFromUtf8(env->isolate(),
2061
                          out.c_str(),
2062
852
                          v8::NewStringType::kNormal).ToLocalChecked());
2063
}
2064
2065
201
static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2066
201
  Environment* env = Environment::GetCurrent(args);
2067
201
  CHECK_GE(args.Length(), 1);
2068
603
  CHECK(args[0]->IsString());
2069
201
  Utf8Value value(env->isolate(), args[0]);
2070
2071
393
  url_host host{{""}, HOST_TYPE_DOMAIN};
2072
  // Assuming the host is used for a special scheme.
2073
201
  ParseHost(&host, *value, value.length(), true, true);
2074
201
  if (host.type == HOST_TYPE_FAILED) {
2075
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2076
210
    return;
2077
  }
2078
384
  std::string out;
2079
192
  WriteHost(&host, &out);
2080
  args.GetReturnValue().Set(
2081
      String::NewFromUtf8(env->isolate(),
2082
                          out.c_str(),
2083
768
                          v8::NewStringType::kNormal).ToLocalChecked());
2084
}
2085
2086
226
std::string URL::ToFilePath() const {
2087
226
  if (context_.scheme != "file:") {
2088
1
    return "";
2089
  }
2090
2091
#ifdef _WIN32
2092
  const char* slash = "\\";
2093
  auto is_slash = [] (char ch) {
2094
    return ch == '/' || ch == '\\';
2095
  };
2096
#else
2097
225
  const char* slash = "/";
2098
16610
  auto is_slash = [] (char ch) {
2099
    return ch == '/';
2100
16610
  };
2101

450
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2102
225
      context_.host.length() > 0) {
2103
1
    return "";
2104
  }
2105
#endif
2106
224
  std::string decoded_path;
2107
2067
  for (const std::string& part : context_.path) {
2108
1844
    std::string decoded;
2109
1844
    PercentDecode(part.c_str(), part.length(), &decoded);
2110
18453
    for (char& ch : decoded) {
2111
16610
      if (is_slash(ch)) {
2112
1
        return "";
2113
      }
2114
    }
2115
1843
    decoded_path += slash + decoded;
2116
1843
  }
2117
2118
#ifdef _WIN32
2119
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2120
2121
  // If hostname is set, then we have a UNC path. Pass the hostname through
2122
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
2123
  // need to worry about percent encoding because the URL parser will have
2124
  // already taken care of that for us. Note that this only causes IDNs with an
2125
  // appropriate `xn--` prefix to be decoded.
2126
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2127
      context_.host.length() > 0) {
2128
    std::string unicode_host;
2129
    if (!ToUnicode(context_.host, &unicode_host)) {
2130
      return "";
2131
    }
2132
    return "\\\\" + unicode_host + decoded_path;
2133
  }
2134
  // Otherwise, it's a local path that requires a drive letter.
2135
  if (decoded_path.length() < 3) {
2136
    return "";
2137
  }
2138
  if (decoded_path[2] != ':' ||
2139
      !IsASCIIAlpha(decoded_path[1])) {
2140
    return "";
2141
  }
2142
  // Strip out the leading '\'.
2143
  return decoded_path.substr(1);
2144
#else
2145
223
  return decoded_path;
2146
#endif
2147
}
2148
2149
// This function works by calling out to a JS function that creates and
2150
// returns the JS URL object. Be mindful of the JS<->Native boundary
2151
// crossing that is required.
2152
44
const Local<Value> URL::ToObject(Environment* env) const {
2153
44
  Isolate* isolate = env->isolate();
2154
44
  Local<Context> context = env->context();
2155
  Context::Scope context_scope(context);
2156
2157
  const Local<Value> undef = Undefined(isolate);
2158
  const Local<Value> null = Null(isolate);
2159
2160
44
  if (context_.flags & URL_FLAGS_FAILED)
2161
    return Local<Value>();
2162
2163
  Local<Value> argv[9] = {
2164
    undef,
2165
    undef,
2166
    undef,
2167
    undef,
2168
    null,  // host defaults to null
2169
    null,  // port defaults to null
2170
    undef,
2171
    null,  // query defaults to null
2172
    null,  // fragment defaults to null
2173
44
  };
2174
44
  SetArgs(env, argv, &context_);
2175
2176
88
  TryCatch try_catch(isolate);
2177
2178
  // The SetURLConstructor method must have been called already to
2179
  // set the constructor function used below. SetURLConstructor is
2180
  // called automatically when the internal/url.js module is loaded
2181
  // during the internal/bootstrap_node.js processing.
2182
  MaybeLocal<Value> ret =
2183
      env->url_constructor_function()
2184
88
          ->Call(env->context(), undef, 9, argv);
2185
2186
44
  if (ret.IsEmpty()) {
2187
    ClearFatalExceptionHandlers(env);
2188
    FatalException(isolate, try_catch);
2189
  }
2190
2191
  return ret.ToLocalChecked();
2192
}
2193
2194
3200
static void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2195
3200
  Environment* env = Environment::GetCurrent(args);
2196
3200
  CHECK_EQ(args.Length(), 1);
2197
6400
  CHECK(args[0]->IsFunction());
2198
6400
  env->set_url_constructor_function(args[0].As<Function>());
2199
3200
}
2200
2201
3200
static void Init(Local<Object> target,
2202
                 Local<Value> unused,
2203
                 Local<Context> context,
2204
                 void* priv) {
2205
3200
  Environment* env = Environment::GetCurrent(context);
2206
3200
  env->SetMethod(target, "parse", Parse);
2207
3200
  env->SetMethod(target, "encodeAuth", EncodeAuthSet);
2208
3200
  env->SetMethod(target, "toUSVString", ToUSVString);
2209
3200
  env->SetMethod(target, "domainToASCII", DomainToASCII);
2210
3200
  env->SetMethod(target, "domainToUnicode", DomainToUnicode);
2211
3200
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2212
2213
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2214
115200
  FLAGS(XX)
2215
#undef XX
2216
2217
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
2218
201600
  PARSESTATES(XX)
2219
#undef XX
2220
3200
}
2221
}  // namespace url
2222
}  // namespace node
2223
2224
3246
NODE_MODULE_CONTEXT_AWARE_BUILTIN(url, node::url::Init)