GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_url.cc Lines: 1149 1183 97.1 %
Date: 2019-09-26 22:31:05 Branches: 1068 1194 89.4 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_i18n.h"
5
#include "util-inl.h"
6
7
#include <cmath>
8
#include <cstdio>
9
#include <string>
10
#include <vector>
11
12
namespace node {
13
14
using errors::TryCatchScope;
15
16
using v8::Array;
17
using v8::Context;
18
using v8::Function;
19
using v8::FunctionCallbackInfo;
20
using v8::HandleScope;
21
using v8::Int32;
22
using v8::Integer;
23
using v8::Isolate;
24
using v8::Local;
25
using v8::MaybeLocal;
26
using v8::NewStringType;
27
using v8::Null;
28
using v8::Object;
29
using v8::String;
30
using v8::Undefined;
31
using v8::Value;
32
33
165345
inline Local<String> Utf8String(Isolate* isolate, const std::string& str) {
34
  return String::NewFromUtf8(isolate,
35
                             str.data(),
36
                             NewStringType::kNormal,
37
330690
                             str.length()).ToLocalChecked();
38
}
39
40
namespace url {
41
42
namespace {
43
44
// https://url.spec.whatwg.org/#eof-code-point
45
const char kEOL = -1;
46
47
// Used in ToUSVString().
48
const char16_t kUnicodeReplacementCharacter = 0xFFFD;
49
50
// https://url.spec.whatwg.org/#concept-host
51
2926
class URLHost {
52
 public:
53
  ~URLHost();
54
55
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
56
  void ParseIPv6Host(const char* input, size_t length);
57
  void ParseOpaqueHost(const char* input, size_t length);
58
  void ParseHost(const char* input,
59
                 size_t length,
60
                 bool is_special,
61
                 bool unicode = false);
62
63
2926
  inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
64
  std::string ToString() const;
65
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
66
  std::string ToStringMove();
67
68
 private:
69
  enum class HostType {
70
    H_FAILED,
71
    H_DOMAIN,
72
    H_IPV4,
73
    H_IPV6,
74
    H_OPAQUE,
75
  };
76
77
  union Value {
78
    std::string domain_or_opaque;
79
    uint32_t ipv4;
80
    uint16_t ipv6[8];
81
82
2926
    ~Value() {}
83
2926
    Value() : ipv4(0) {}
84
  };
85
86
  Value value_;
87
  HostType type_ = HostType::H_FAILED;
88
89
8223
  inline void Reset() {
90
    using string = std::string;
91
8223
    switch (type_) {
92
      case HostType::H_DOMAIN:
93
      case HostType::H_OPAQUE:
94
2605
        value_.domain_or_opaque.~string();
95
2605
        break;
96
      default:
97
5618
        break;
98
    }
99
8223
    type_ = HostType::H_FAILED;
100
8223
  }
101
102
  // Setting the string members of the union with = is brittle because
103
  // it relies on them being initialized to a state that requires no
104
  // destruction of old data.
105
  // For a long time, that worked well enough because ParseIPv6Host() happens
106
  // to zero-fill `value_`, but that really is relying on standard library
107
  // internals too much.
108
  // These helpers are the easiest solution but we might want to consider
109
  // just not forcing strings into an union.
110
127
  inline void SetOpaque(std::string&& string) {
111
127
    Reset();
112
127
    type_ = HostType::H_OPAQUE;
113
127
    new(&value_.domain_or_opaque) std::string(std::move(string));
114
127
  }
115
116
2478
  inline void SetDomain(std::string&& string) {
117
2478
    Reset();
118
2478
    type_ = HostType::H_DOMAIN;
119
2478
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
2478
  }
121
};
122
123
5852
URLHost::~URLHost() {
124
2926
  Reset();
125
2926
}
126
127
#define ARGS(XX)                                                              \
128
  XX(ARG_FLAGS)                                                               \
129
  XX(ARG_PROTOCOL)                                                            \
130
  XX(ARG_USERNAME)                                                            \
131
  XX(ARG_PASSWORD)                                                            \
132
  XX(ARG_HOST)                                                                \
133
  XX(ARG_PORT)                                                                \
134
  XX(ARG_PATH)                                                                \
135
  XX(ARG_QUERY)                                                               \
136
  XX(ARG_FRAGMENT)                                                            \
137
  XX(ARG_COUNT)  // This one has to be last.
138
139
#define ERR_ARGS(XX)                                                          \
140
  XX(ERR_ARG_FLAGS)                                                           \
141
  XX(ERR_ARG_INPUT)                                                           \
142
143
enum url_cb_args {
144
#define XX(name) name,
145
  ARGS(XX)
146
#undef XX
147
};
148
149
enum url_error_cb_args {
150
#define XX(name) name,
151
  ERR_ARGS(XX)
152
#undef XX
153
};
154
155
#define CHAR_TEST(bits, name, expr)                                           \
156
  template <typename T>                                                       \
157
  inline bool name(const T ch) {                                              \
158
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
159
                  "Character must be wider than " #bits " bits");             \
160
    return (expr);                                                            \
161
  }
162
163
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
164
  template <typename T>                                                       \
165
  inline bool name(const T ch1, const T ch2) {                                \
166
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
167
                  "Character must be wider than " #bits " bits");             \
168
    return (expr);                                                            \
169
  }                                                                           \
170
  template <typename T>                                                       \
171
  inline bool name(const std::basic_string<T>& str) {                         \
172
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
173
                  "Character must be wider than " #bits " bits");             \
174
    return str.length() >= 2 && name(str[0], str[1]);                         \
175
  }
176
177
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
178

20939503
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
179
180
// https://infra.spec.whatwg.org/#c0-control-or-space
181

332497
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
182
183
// https://infra.spec.whatwg.org/#ascii-digit
184

662797
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
185
186
// https://infra.spec.whatwg.org/#ascii-hex-digit
187


918
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
188
                               (ch >= 'A' && ch <= 'F') ||
189
                               (ch >= 'a' && ch <= 'f')))
190
191
// https://infra.spec.whatwg.org/#ascii-alpha
192


1479521
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
193
                            (ch >= 'a' && ch <= 'z')))
194
195
// https://infra.spec.whatwg.org/#ascii-alphanumeric
196

655112
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
197
198
// https://infra.spec.whatwg.org/#ascii-lowercase
199
template <typename T>
200
655149
inline T ASCIILowercase(T ch) {
201
655149
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
202
}
203
204
// https://url.spec.whatwg.org/#forbidden-host-code-point
205







72499
CHAR_TEST(8, IsForbiddenHostCodePoint,
206
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
207
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
208
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
209
          ch == '\\' || ch == ']')
210
211
// https://url.spec.whatwg.org/#windows-drive-letter
212


2298
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
213
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
214
215
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
216


940
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
217
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
218
219
// If a UTF-16 character is a low/trailing surrogate.
220
1
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
221
222
// If a UTF-16 character is a surrogate.
223
21
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
224
225
// If a UTF-16 surrogate is a low/trailing one.
226
13
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
227
228
#undef CHAR_TEST
229
#undef TWO_CHAR_STRING_TEST
230
231
const char* hex[256] = {
232
  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
233
  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
234
  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
235
  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
236
  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
237
  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
238
  "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
239
  "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
240
  "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
241
  "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
242
  "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
243
  "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
244
  "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
245
  "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
246
  "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
247
  "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
248
  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
249
  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
250
  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
251
  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
252
  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
253
  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
254
  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
255
  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
256
  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
257
  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
258
  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
259
  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
260
  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
261
  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
262
  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
263
  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
264
};
265
266
const uint8_t C0_CONTROL_ENCODE_SET[32] = {
267
  // 00     01     02     03     04     05     06     07
268
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269
  // 08     09     0A     0B     0C     0D     0E     0F
270
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271
  // 10     11     12     13     14     15     16     17
272
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273
  // 18     19     1A     1B     1C     1D     1E     1F
274
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
275
  // 20     21     22     23     24     25     26     27
276
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
277
  // 28     29     2A     2B     2C     2D     2E     2F
278
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
279
  // 30     31     32     33     34     35     36     37
280
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
281
  // 38     39     3A     3B     3C     3D     3E     3F
282
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
283
  // 40     41     42     43     44     45     46     47
284
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
285
  // 48     49     4A     4B     4C     4D     4E     4F
286
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
287
  // 50     51     52     53     54     55     56     57
288
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
289
  // 58     59     5A     5B     5C     5D     5E     5F
290
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
291
  // 60     61     62     63     64     65     66     67
292
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
293
  // 68     69     6A     6B     6C     6D     6E     6F
294
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
295
  // 70     71     72     73     74     75     76     77
296
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
297
  // 78     79     7A     7B     7C     7D     7E     7F
298
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
299
  // 80     81     82     83     84     85     86     87
300
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
301
  // 88     89     8A     8B     8C     8D     8E     8F
302
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
303
  // 90     91     92     93     94     95     96     97
304
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
305
  // 98     99     9A     9B     9C     9D     9E     9F
306
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
307
  // A0     A1     A2     A3     A4     A5     A6     A7
308
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
309
  // A8     A9     AA     AB     AC     AD     AE     AF
310
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
311
  // B0     B1     B2     B3     B4     B5     B6     B7
312
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
313
  // B8     B9     BA     BB     BC     BD     BE     BF
314
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
315
  // C0     C1     C2     C3     C4     C5     C6     C7
316
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
317
  // C8     C9     CA     CB     CC     CD     CE     CF
318
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
319
  // D0     D1     D2     D3     D4     D5     D6     D7
320
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
321
  // D8     D9     DA     DB     DC     DD     DE     DF
322
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
323
  // E0     E1     E2     E3     E4     E5     E6     E7
324
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
325
  // E8     E9     EA     EB     EC     ED     EE     EF
326
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
327
  // F0     F1     F2     F3     F4     F5     F6     F7
328
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
329
  // F8     F9     FA     FB     FC     FD     FE     FF
330
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
331
};
332
333
const uint8_t FRAGMENT_ENCODE_SET[32] = {
334
  // 00     01     02     03     04     05     06     07
335
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336
  // 08     09     0A     0B     0C     0D     0E     0F
337
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338
  // 10     11     12     13     14     15     16     17
339
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340
  // 18     19     1A     1B     1C     1D     1E     1F
341
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
342
  // 20     21     22     23     24     25     26     27
343
    0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
344
  // 28     29     2A     2B     2C     2D     2E     2F
345
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
346
  // 30     31     32     33     34     35     36     37
347
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
348
  // 38     39     3A     3B     3C     3D     3E     3F
349
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
350
  // 40     41     42     43     44     45     46     47
351
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
352
  // 48     49     4A     4B     4C     4D     4E     4F
353
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
354
  // 50     51     52     53     54     55     56     57
355
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
356
  // 58     59     5A     5B     5C     5D     5E     5F
357
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
358
  // 60     61     62     63     64     65     66     67
359
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
360
  // 68     69     6A     6B     6C     6D     6E     6F
361
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
362
  // 70     71     72     73     74     75     76     77
363
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
364
  // 78     79     7A     7B     7C     7D     7E     7F
365
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
366
  // 80     81     82     83     84     85     86     87
367
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
368
  // 88     89     8A     8B     8C     8D     8E     8F
369
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
370
  // 90     91     92     93     94     95     96     97
371
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
372
  // 98     99     9A     9B     9C     9D     9E     9F
373
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
374
  // A0     A1     A2     A3     A4     A5     A6     A7
375
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
376
  // A8     A9     AA     AB     AC     AD     AE     AF
377
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
378
  // B0     B1     B2     B3     B4     B5     B6     B7
379
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
380
  // B8     B9     BA     BB     BC     BD     BE     BF
381
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
382
  // C0     C1     C2     C3     C4     C5     C6     C7
383
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
384
  // C8     C9     CA     CB     CC     CD     CE     CF
385
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
386
  // D0     D1     D2     D3     D4     D5     D6     D7
387
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
388
  // D8     D9     DA     DB     DC     DD     DE     DF
389
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
390
  // E0     E1     E2     E3     E4     E5     E6     E7
391
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
392
  // E8     E9     EA     EB     EC     ED     EE     EF
393
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
394
  // F0     F1     F2     F3     F4     F5     F6     F7
395
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
396
  // F8     F9     FA     FB     FC     FD     FE     FF
397
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
398
};
399
400
401
const uint8_t PATH_ENCODE_SET[32] = {
402
  // 00     01     02     03     04     05     06     07
403
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
404
  // 08     09     0A     0B     0C     0D     0E     0F
405
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
406
  // 10     11     12     13     14     15     16     17
407
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
408
  // 18     19     1A     1B     1C     1D     1E     1F
409
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
410
  // 20     21     22     23     24     25     26     27
411
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
412
  // 28     29     2A     2B     2C     2D     2E     2F
413
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
414
  // 30     31     32     33     34     35     36     37
415
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
416
  // 38     39     3A     3B     3C     3D     3E     3F
417
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
418
  // 40     41     42     43     44     45     46     47
419
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
420
  // 48     49     4A     4B     4C     4D     4E     4F
421
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
422
  // 50     51     52     53     54     55     56     57
423
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
424
  // 58     59     5A     5B     5C     5D     5E     5F
425
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
426
  // 60     61     62     63     64     65     66     67
427
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
428
  // 68     69     6A     6B     6C     6D     6E     6F
429
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430
  // 70     71     72     73     74     75     76     77
431
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
432
  // 78     79     7A     7B     7C     7D     7E     7F
433
    0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
434
  // 80     81     82     83     84     85     86     87
435
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
436
  // 88     89     8A     8B     8C     8D     8E     8F
437
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
438
  // 90     91     92     93     94     95     96     97
439
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
440
  // 98     99     9A     9B     9C     9D     9E     9F
441
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
442
  // A0     A1     A2     A3     A4     A5     A6     A7
443
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
444
  // A8     A9     AA     AB     AC     AD     AE     AF
445
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
446
  // B0     B1     B2     B3     B4     B5     B6     B7
447
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
448
  // B8     B9     BA     BB     BC     BD     BE     BF
449
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
450
  // C0     C1     C2     C3     C4     C5     C6     C7
451
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452
  // C8     C9     CA     CB     CC     CD     CE     CF
453
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454
  // D0     D1     D2     D3     D4     D5     D6     D7
455
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456
  // D8     D9     DA     DB     DC     DD     DE     DF
457
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458
  // E0     E1     E2     E3     E4     E5     E6     E7
459
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460
  // E8     E9     EA     EB     EC     ED     EE     EF
461
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462
  // F0     F1     F2     F3     F4     F5     F6     F7
463
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
464
  // F8     F9     FA     FB     FC     FD     FE     FF
465
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
466
};
467
468
const uint8_t USERINFO_ENCODE_SET[32] = {
469
  // 00     01     02     03     04     05     06     07
470
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
471
  // 08     09     0A     0B     0C     0D     0E     0F
472
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
473
  // 10     11     12     13     14     15     16     17
474
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
475
  // 18     19     1A     1B     1C     1D     1E     1F
476
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
477
  // 20     21     22     23     24     25     26     27
478
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
479
  // 28     29     2A     2B     2C     2D     2E     2F
480
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
481
  // 30     31     32     33     34     35     36     37
482
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
483
  // 38     39     3A     3B     3C     3D     3E     3F
484
    0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
485
  // 40     41     42     43     44     45     46     47
486
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
487
  // 48     49     4A     4B     4C     4D     4E     4F
488
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
489
  // 50     51     52     53     54     55     56     57
490
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
491
  // 58     59     5A     5B     5C     5D     5E     5F
492
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
493
  // 60     61     62     63     64     65     66     67
494
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
495
  // 68     69     6A     6B     6C     6D     6E     6F
496
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
497
  // 70     71     72     73     74     75     76     77
498
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
499
  // 78     79     7A     7B     7C     7D     7E     7F
500
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
501
  // 80     81     82     83     84     85     86     87
502
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
503
  // 88     89     8A     8B     8C     8D     8E     8F
504
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
505
  // 90     91     92     93     94     95     96     97
506
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
507
  // 98     99     9A     9B     9C     9D     9E     9F
508
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
509
  // A0     A1     A2     A3     A4     A5     A6     A7
510
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
511
  // A8     A9     AA     AB     AC     AD     AE     AF
512
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
513
  // B0     B1     B2     B3     B4     B5     B6     B7
514
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
515
  // B8     B9     BA     BB     BC     BD     BE     BF
516
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
517
  // C0     C1     C2     C3     C4     C5     C6     C7
518
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
519
  // C8     C9     CA     CB     CC     CD     CE     CF
520
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
521
  // D0     D1     D2     D3     D4     D5     D6     D7
522
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
523
  // D8     D9     DA     DB     DC     DD     DE     DF
524
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
525
  // E0     E1     E2     E3     E4     E5     E6     E7
526
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
527
  // E8     E9     EA     EB     EC     ED     EE     EF
528
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
529
  // F0     F1     F2     F3     F4     F5     F6     F7
530
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
531
  // F8     F9     FA     FB     FC     FD     FE     FF
532
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
533
};
534
535
const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
536
  // 00     01     02     03     04     05     06     07
537
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
538
  // 08     09     0A     0B     0C     0D     0E     0F
539
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
540
  // 10     11     12     13     14     15     16     17
541
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
542
  // 18     19     1A     1B     1C     1D     1E     1F
543
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
544
  // 20     21     22     23     24     25     26     27
545
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
546
  // 28     29     2A     2B     2C     2D     2E     2F
547
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
548
  // 30     31     32     33     34     35     36     37
549
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
550
  // 38     39     3A     3B     3C     3D     3E     3F
551
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
552
  // 40     41     42     43     44     45     46     47
553
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
554
  // 48     49     4A     4B     4C     4D     4E     4F
555
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
556
  // 50     51     52     53     54     55     56     57
557
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
558
  // 58     59     5A     5B     5C     5D     5E     5F
559
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
560
  // 60     61     62     63     64     65     66     67
561
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
562
  // 68     69     6A     6B     6C     6D     6E     6F
563
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
564
  // 70     71     72     73     74     75     76     77
565
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
566
  // 78     79     7A     7B     7C     7D     7E     7F
567
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
568
  // 80     81     82     83     84     85     86     87
569
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
570
  // 88     89     8A     8B     8C     8D     8E     8F
571
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
572
  // 90     91     92     93     94     95     96     97
573
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
574
  // 98     99     9A     9B     9C     9D     9E     9F
575
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
576
  // A0     A1     A2     A3     A4     A5     A6     A7
577
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
578
  // A8     A9     AA     AB     AC     AD     AE     AF
579
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
580
  // B0     B1     B2     B3     B4     B5     B6     B7
581
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
582
  // B8     B9     BA     BB     BC     BD     BE     BF
583
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
584
  // C0     C1     C2     C3     C4     C5     C6     C7
585
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
586
  // C8     C9     CA     CB     CC     CD     CE     CF
587
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
588
  // D0     D1     D2     D3     D4     D5     D6     D7
589
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
590
  // D8     D9     DA     DB     DC     DD     DE     DF
591
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
592
  // E0     E1     E2     E3     E4     E5     E6     E7
593
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
594
  // E8     E9     EA     EB     EC     ED     EE     EF
595
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
596
  // F0     F1     F2     F3     F4     F5     F6     F7
597
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
598
  // F8     F9     FA     FB     FC     FD     FE     FF
599
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
600
};
601
602
// Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
603
const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
604
  // 00     01     02     03     04     05     06     07
605
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
606
  // 08     09     0A     0B     0C     0D     0E     0F
607
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
608
  // 10     11     12     13     14     15     16     17
609
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
610
  // 18     19     1A     1B     1C     1D     1E     1F
611
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
612
  // 20     21     22     23     24     25     26     27
613
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
614
  // 28     29     2A     2B     2C     2D     2E     2F
615
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
616
  // 30     31     32     33     34     35     36     37
617
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
618
  // 38     39     3A     3B     3C     3D     3E     3F
619
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
620
  // 40     41     42     43     44     45     46     47
621
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
622
  // 48     49     4A     4B     4C     4D     4E     4F
623
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
624
  // 50     51     52     53     54     55     56     57
625
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
626
  // 58     59     5A     5B     5C     5D     5E     5F
627
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
628
  // 60     61     62     63     64     65     66     67
629
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
630
  // 68     69     6A     6B     6C     6D     6E     6F
631
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
632
  // 70     71     72     73     74     75     76     77
633
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
634
  // 78     79     7A     7B     7C     7D     7E     7F
635
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
636
  // 80     81     82     83     84     85     86     87
637
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
638
  // 88     89     8A     8B     8C     8D     8E     8F
639
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
640
  // 90     91     92     93     94     95     96     97
641
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
642
  // 98     99     9A     9B     9C     9D     9E     9F
643
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
644
  // A0     A1     A2     A3     A4     A5     A6     A7
645
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
646
  // A8     A9     AA     AB     AC     AD     AE     AF
647
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
648
  // B0     B1     B2     B3     B4     B5     B6     B7
649
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
650
  // B8     B9     BA     BB     BC     BD     BE     BF
651
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
652
  // C0     C1     C2     C3     C4     C5     C6     C7
653
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
654
  // C8     C9     CA     CB     CC     CD     CE     CF
655
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
656
  // D0     D1     D2     D3     D4     D5     D6     D7
657
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
658
  // D8     D9     DA     DB     DC     DD     DE     DF
659
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
660
  // E0     E1     E2     E3     E4     E5     E6     E7
661
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
662
  // E8     E9     EA     EB     EC     ED     EE     EF
663
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
664
  // F0     F1     F2     F3     F4     F5     F6     F7
665
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
666
  // F8     F9     FA     FB     FC     FD     FE     FF
667
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
668
};
669
670
17828981
inline bool BitAt(const uint8_t a[], const uint8_t i) {
671
17828981
  return !!(a[i >> 3] & (1 << (i & 7)));
672
}
673
674
// Appends ch to str. If ch position in encode_set is set, the ch will
675
// be percent-encoded then appended.
676
17828981
inline void AppendOrEscape(std::string* str,
677
                           const unsigned char ch,
678
                           const uint8_t encode_set[]) {
679
17828981
  if (BitAt(encode_set, ch))
680
563
    *str += hex[ch];
681
  else
682
17828419
    *str += ch;
683
17828981
}
684
685
template <typename T>
686
586
inline unsigned hex2bin(const T ch) {
687

586
  if (ch >= '0' && ch <= '9')
688
455
    return ch - '0';
689

131
  if (ch >= 'A' && ch <= 'F')
690
32
    return 10 + (ch - 'A');
691

99
  if (ch >= 'a' && ch <= 'f')
692
99
    return 10 + (ch - 'a');
693
  return static_cast<unsigned>(-1);
694
}
695
696
16197
inline std::string PercentDecode(const char* input, size_t len) {
697
16197
  std::string dest;
698
16197
  if (len == 0)
699
9
    return dest;
700
16188
  dest.reserve(len);
701
16188
  const char* pointer = input;
702
16188
  const char* end = input + len;
703
704
237110
  while (pointer < end) {
705
204734
    const char ch = pointer[0];
706
204734
    const size_t remaining = end - pointer - 1;
707


409281
    if (ch != '%' || remaining < 2 ||
708
196
        (ch == '%' &&
709
389
         (!IsASCIIHexDigit(pointer[1]) ||
710
193
          !IsASCIIHexDigit(pointer[2])))) {
711
204547
      dest += ch;
712
204547
      pointer++;
713
204547
      continue;
714
    } else {
715
187
      unsigned a = hex2bin(pointer[1]);
716
187
      unsigned b = hex2bin(pointer[2]);
717
187
      char c = static_cast<char>(a * 16 + b);
718
187
      dest += c;
719
187
      pointer += 3;
720
    }
721
  }
722
16188
  return dest;
723
}
724
725
#define SPECIALS(XX)                                                          \
726
  XX("ftp:", 21)                                                              \
727
  XX("file:", -1)                                                             \
728
  XX("gopher:", 70)                                                           \
729
  XX("http:", 80)                                                             \
730
  XX("https:", 443)                                                           \
731
  XX("ws:", 80)                                                               \
732
  XX("wss:", 443)
733
734
406390
inline bool IsSpecial(const std::string& scheme) {
735
#define XX(name, _) if (scheme == name) return true;
736



406390
  SPECIALS(XX);
737
#undef XX
738
1057
  return false;
739
}
740
741
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
742
2226
inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
743
2226
  const size_t length = end - p;
744
2214
  return length >= 2 &&
745

2255
    IsWindowsDriveLetter(p[0], p[1]) &&
746
14
    (length == 2 ||
747
21
      p[2] == '/' ||
748
10
      p[2] == '\\' ||
749
5
      p[2] == '?' ||
750
2228
      p[2] == '#');
751
}
752
753
163588
inline int NormalizePort(const std::string& scheme, int p) {
754
#define XX(name, port) if (scheme == name && p == port) return -1;
755










163588
  SPECIALS(XX);
756
#undef XX
757
3699
  return p;
758
}
759
760
#if defined(NODE_HAVE_I18N_SUPPORT)
761
193
inline bool ToUnicode(const std::string& input, std::string* output) {
762
193
  MaybeStackBuffer<char> buf;
763
193
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
764
    return false;
765
193
  output->assign(*buf, buf.length());
766
193
  return true;
767
}
768
769
2681
inline bool ToASCII(const std::string& input, std::string* output) {
770
2681
  MaybeStackBuffer<char> buf;
771
2681
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
772
69
    return false;
773
2612
  output->assign(*buf, buf.length());
774
2612
  return true;
775
}
776
#else
777
// Intentional non-ops if ICU is not present.
778
inline bool ToUnicode(const std::string& input, std::string* output) {
779
  *output = input;
780
  return true;
781
}
782
783
inline bool ToASCII(const std::string& input, std::string* output) {
784
  *output = input;
785
  return true;
786
}
787
#endif
788
789
91
void URLHost::ParseIPv6Host(const char* input, size_t length) {
790
91
  CHECK_EQ(type_, HostType::H_FAILED);
791
91
  unsigned size = arraysize(value_.ipv6);
792
819
  for (unsigned n = 0; n < size; n++)
793
728
    value_.ipv6[n] = 0;
794
91
  uint16_t* piece_pointer = &value_.ipv6[0];
795
91
  uint16_t* const buffer_end = piece_pointer + size;
796
91
  uint16_t* compress_pointer = nullptr;
797
91
  const char* pointer = input;
798
91
  const char* end = pointer + length;
799
  unsigned value, len, numbers_seen;
800
91
  char ch = pointer < end ? pointer[0] : kEOL;
801
91
  if (ch == ':') {
802

33
    if (length < 2 || pointer[1] != ':')
803
3
      return;
804
30
    pointer += 2;
805
30
    ch = pointer < end ? pointer[0] : kEOL;
806
30
    piece_pointer++;
807
30
    compress_pointer = piece_pointer;
808
  }
809
334
  while (ch != kEOL) {
810
219
    if (piece_pointer >= buffer_end)
811
3
      return;
812
216
    if (ch == ':') {
813
17
      if (compress_pointer != nullptr)
814
3
        return;
815
14
      pointer++;
816
14
      ch = pointer < end ? pointer[0] : kEOL;
817
14
      piece_pointer++;
818
14
      compress_pointer = piece_pointer;
819
14
      continue;
820
    }
821
199
    value = 0;
822
199
    len = 0;
823

610
    while (len < 4 && IsASCIIHexDigit(ch)) {
824
212
      value = value * 0x10 + hex2bin(ch);
825
212
      pointer++;
826
212
      ch = pointer < end ? pointer[0] : kEOL;
827
212
      len++;
828
    }
829

199
    switch (ch) {
830
      case '.':
831
43
        if (len == 0)
832
3
          return;
833
40
        pointer -= len;
834
40
        ch = pointer < end ? pointer[0] : kEOL;
835
40
        if (piece_pointer > buffer_end - 2)
836
3
          return;
837
37
        numbers_seen = 0;
838
167
        while (ch != kEOL) {
839
123
          value = 0xffffffff;
840
123
          if (numbers_seen > 0) {
841

86
            if (ch == '.' && numbers_seen < 4) {
842
78
              pointer++;
843
78
              ch = pointer < end ? pointer[0] : kEOL;
844
            } else {
845
8
              return;
846
            }
847
          }
848
115
          if (!IsASCIIDigit(ch))
849
16
            return;
850
320
          while (IsASCIIDigit(ch)) {
851
128
            unsigned number = ch - '0';
852
128
            if (value == 0xffffffff) {
853
99
              value = number;
854
29
            } else if (value == 0) {
855
3
              return;
856
            } else {
857
26
              value = value * 10 + number;
858
            }
859
125
            if (value > 255)
860
3
              return;
861
122
            pointer++;
862
122
            ch = pointer < end ? pointer[0] : kEOL;
863
          }
864
93
          *piece_pointer = *piece_pointer * 0x100 + value;
865
93
          numbers_seen++;
866

93
          if (numbers_seen == 2 || numbers_seen == 4)
867
37
            piece_pointer++;
868
        }
869
7
        if (numbers_seen != 4)
870
3
          return;
871
4
        continue;
872
      case ':':
873
125
        pointer++;
874
125
        ch = pointer < end ? pointer[0] : kEOL;
875
125
        if (ch == kEOL)
876
3
          return;
877
122
        break;
878
      case kEOL:
879
18
        break;
880
      default:
881
13
        return;
882
    }
883
140
    *piece_pointer = value;
884
140
    piece_pointer++;
885
  }
886
887
27
  if (compress_pointer != nullptr) {
888
18
    unsigned swaps = piece_pointer - compress_pointer;
889
18
    piece_pointer = buffer_end - 1;
890

56
    while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
891
20
      uint16_t temp = *piece_pointer;
892
20
      uint16_t* swap_piece = compress_pointer + swaps - 1;
893
20
      *piece_pointer = *swap_piece;
894
20
      *swap_piece = temp;
895
20
       piece_pointer--;
896
20
       swaps--;
897
    }
898

9
  } else if (compress_pointer == nullptr &&
899
             piece_pointer != buffer_end) {
900
3
    return;
901
  }
902
24
  type_ = HostType::H_IPV6;
903
}
904
905
2730
inline int64_t ParseNumber(const char* start, const char* end) {
906
2730
  unsigned R = 10;
907

2730
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
908
26
    start += 2;
909
26
    R = 16;
910
  }
911
2730
  if (end - start == 0) {
912
4
    return 0;
913

2726
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
914
32
    start++;
915
32
    R = 8;
916
  }
917
2726
  const char* p = start;
918
919
6263
  while (p < end) {
920
3281
    const char ch = p[0];
921

3281
    switch (R) {
922
      case 8:
923

173
        if (ch < '0' || ch > '7')
924
19
          return -1;
925
154
        break;
926
      case 10:
927
2986
        if (!IsASCIIDigit(ch))
928
2449
          return -1;
929
537
        break;
930
      case 16:
931
122
        if (!IsASCIIHexDigit(ch))
932
2
          return -1;
933
120
        break;
934
    }
935
811
    p++;
936
  }
937
256
  return strtoll(start, nullptr, R);
938
}
939
940
2562
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
941
2562
  CHECK_EQ(type_, HostType::H_FAILED);
942
2562
  *is_ipv4 = false;
943
2562
  const char* pointer = input;
944
2562
  const char* mark = input;
945
2562
  const char* end = pointer + length;
946
2562
  int parts = 0;
947
2562
  uint32_t val = 0;
948
  uint64_t numbers[4];
949
2562
  int tooBigNumbers = 0;
950
2562
  if (length == 0)
951
2499
    return;
952
953
26602
  while (pointer <= end) {
954
23958
    const char ch = pointer < end ? pointer[0] : kEOL;
955
23958
    const int remaining = end - pointer - 1;
956

23958
    if (ch == '.' || ch == kEOL) {
957
2738
      if (++parts > static_cast<int>(arraysize(numbers)))
958
2
        return;
959
2736
      if (pointer == mark)
960
6
        return;
961
2730
      int64_t n = ParseNumber(mark, pointer);
962
2730
      if (n < 0)
963
2470
        return;
964
965
260
      if (n > 255) {
966
69
        tooBigNumbers++;
967
      }
968
260
      numbers[parts - 1] = n;
969
260
      mark = pointer + 1;
970

260
      if (ch == '.' && remaining == 0)
971
2
        break;
972
    }
973
21478
    pointer++;
974
  }
975
84
  CHECK_GT(parts, 0);
976
84
  *is_ipv4 = true;
977
978
  // If any but the last item in numbers is greater than 255, return failure.
979
  // If the last item in numbers is greater than or equal to
980
  // 256^(5 - the number of items in numbers), return failure.
981

165
  if (tooBigNumbers > 1 ||
982

203
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
983
78
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
984
21
    return;
985
  }
986
987
63
  type_ = HostType::H_IPV4;
988
63
  val = numbers[parts - 1];
989
181
  for (int n = 0; n < parts - 1; n++) {
990
118
    double b = 3 - n;
991
118
    val += numbers[n] * pow(256, b);
992
  }
993
994
63
  value_.ipv4 = val;
995
}
996
997
147
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
998
147
  CHECK_EQ(type_, HostType::H_FAILED);
999
147
  std::string output;
1000
147
  output.reserve(length);
1001
927
  for (size_t i = 0; i < length; i++) {
1002
800
    const char ch = input[i];
1003

800
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1004
167
      return;
1005
    } else {
1006
780
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1007
    }
1008
  }
1009
1010
127
  SetOpaque(std::move(output));
1011
}
1012
1013
2926
void URLHost::ParseHost(const char* input,
1014
                        size_t length,
1015
                        bool is_special,
1016
                        bool unicode) {
1017
2926
  CHECK_EQ(type_, HostType::H_FAILED);
1018
2926
  const char* pointer = input;
1019
1020
2926
  if (length == 0)
1021
449
    return;
1022
1023
2925
  if (pointer[0] == '[') {
1024
97
    if (pointer[length - 1] != ']')
1025
6
      return;
1026
91
    return ParseIPv6Host(++pointer, length - 2);
1027
  }
1028
1029
2828
  if (!is_special)
1030
147
    return ParseOpaqueHost(input, length);
1031
1032
  // First, we have to percent decode
1033
2681
  std::string decoded = PercentDecode(input, length);
1034
1035
  // Then we have to punycode toASCII
1036
2681
  if (!ToASCII(decoded, &decoded))
1037
69
    return;
1038
1039
  // If any of the following characters are still present, we have to fail
1040
74266
  for (size_t n = 0; n < decoded.size(); n++) {
1041
71704
    const char ch = decoded[n];
1042
71704
    if (IsForbiddenHostCodePoint(ch)) {
1043
50
      return;
1044
    }
1045
  }
1046
1047
  // Check to see if it's an IPv4 IP address
1048
  bool is_ipv4;
1049
2562
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1050
2562
  if (is_ipv4)
1051
84
    return;
1052
1053
  // If the unicode flag is set, run the result through punycode ToUnicode
1054

2478
  if (unicode && !ToUnicode(decoded, &decoded))
1055
    return;
1056
1057
  // It's not an IPv4 or IPv6 address, it must be a domain
1058
2478
  SetDomain(std::move(decoded));
1059
}
1060
1061
// Locates the longest sequence of 0 segments in an IPv6 address
1062
// in order to use the :: compression when serializing
1063
template <typename T>
1064
24
inline T* FindLongestZeroSequence(T* values, size_t len) {
1065
24
  T* start = values;
1066
24
  T* end = start + len;
1067
24
  T* result = nullptr;
1068
1069
24
  T* current = nullptr;
1070
24
  unsigned counter = 0, longest = 1;
1071
1072
240
  while (start < end) {
1073
192
    if (*start == 0) {
1074
144
      if (current == nullptr)
1075
31
        current = start;
1076
144
      counter++;
1077
    } else {
1078
48
      if (counter > longest) {
1079
20
        longest = counter;
1080
20
        result = current;
1081
      }
1082
48
      counter = 0;
1083
48
      current = nullptr;
1084
    }
1085
192
    start++;
1086
  }
1087
24
  if (counter > longest)
1088
3
    result = current;
1089
24
  return result;
1090
}
1091
1092
2692
std::string URLHost::ToStringMove() {
1093
2692
  std::string return_value;
1094
2692
  switch (type_) {
1095
    case HostType::H_DOMAIN:
1096
    case HostType::H_OPAQUE:
1097
2605
      return_value = std::move(value_.domain_or_opaque);
1098
2605
      break;
1099
    default:
1100
87
      return_value = ToString();
1101
87
      break;
1102
  }
1103
2692
  Reset();
1104
2692
  return return_value;
1105
}
1106
1107
87
std::string URLHost::ToString() const {
1108
87
  std::string dest;
1109

87
  switch (type_) {
1110
    case HostType::H_DOMAIN:
1111
    case HostType::H_OPAQUE:
1112
      return value_.domain_or_opaque;
1113
      break;
1114
    case HostType::H_IPV4: {
1115
63
      dest.reserve(15);
1116
63
      uint32_t value = value_.ipv4;
1117
315
      for (int n = 0; n < 4; n++) {
1118
        char buf[4];
1119
252
        snprintf(buf, sizeof(buf), "%d", value % 256);
1120
252
        dest.insert(0, buf);
1121
252
        if (n < 3)
1122
189
          dest.insert(0, 1, '.');
1123
252
        value /= 256;
1124
      }
1125
63
      break;
1126
    }
1127
    case HostType::H_IPV6: {
1128
24
      dest.reserve(41);
1129
24
      dest += '[';
1130
24
      const uint16_t* start = &value_.ipv6[0];
1131
      const uint16_t* compress_pointer =
1132
24
          FindLongestZeroSequence(start, 8);
1133
24
      bool ignore0 = false;
1134
216
      for (int n = 0; n <= 7; n++) {
1135
192
        const uint16_t* piece = &value_.ipv6[n];
1136

192
        if (ignore0 && *piece == 0)
1137
246
          continue;
1138
80
        else if (ignore0)
1139
19
          ignore0 = false;
1140
80
        if (compress_pointer == piece) {
1141
22
          dest += n == 0 ? "::" : ":";
1142
22
          ignore0 = true;
1143
22
          continue;
1144
        }
1145
        char buf[5];
1146
58
        snprintf(buf, sizeof(buf), "%x", *piece);
1147
58
        dest += buf;
1148
58
        if (n < 7)
1149
37
          dest += ':';
1150
      }
1151
24
      dest += ']';
1152
24
      break;
1153
    }
1154
    case HostType::H_FAILED:
1155
      break;
1156
  }
1157
87
  return dest;
1158
}
1159
1160
2543
bool ParseHost(const std::string& input,
1161
               std::string* output,
1162
               bool is_special,
1163
               bool unicode = false) {
1164
2543
  if (input.length() == 0) {
1165
42
    output->clear();
1166
42
    return true;
1167
  }
1168
2501
  URLHost host;
1169
2501
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1170
2501
  if (host.ParsingFailed())
1171
215
    return false;
1172
2286
  *output = host.ToStringMove();
1173
2286
  return true;
1174
}
1175
1176
1034
inline std::vector<std::string> FromJSStringArray(Environment* env,
1177
                                                  Local<Array> array) {
1178
1034
  std::vector<std::string> vec;
1179
1034
  const int32_t len = array->Length();
1180
1034
  if (len == 0)
1181
8
    return vec;  // nothing to copy
1182
1026
  vec.reserve(len);
1183
2923
  for (int32_t n = 0; n < len; n++) {
1184
5691
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1185
3794
    if (val->IsString()) {
1186
1897
      Utf8Value value(env->isolate(), val.As<String>());
1187
1897
      vec.emplace_back(*value, value.length());
1188
    }
1189
  }
1190
1026
  return vec;
1191
}
1192
1193
1034
inline url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1194
1034
  url_data base;
1195
1034
  Local<Context> context = env->context();
1196
  Local<Value> flags =
1197
4136
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1198
1034
  if (flags->IsInt32())
1199
2068
    base.flags = flags->Int32Value(context).FromJust();
1200
1201
  Local<Value> scheme =
1202
4136
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1203
1034
  base.scheme = Utf8Value(env->isolate(), scheme).out();
1204
1205
  auto GetStr = [&](std::string url_data::*member,
1206
                    int flag,
1207
                    Local<String> name,
1208
5170
                    bool empty_as_present) {
1209
15510
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1210
10340
    if (value->IsString()) {
1211
2610
      Utf8Value utf8value(env->isolate(), value.As<String>());
1212
2610
      (base.*member).assign(*utf8value, utf8value.length());
1213


6746
      if (empty_as_present || value.As<String>()->Length() != 0) {
1214
554
        base.flags |= flag;
1215
2610
      }
1216
    }
1217
6204
  };
1218
  GetStr(&url_data::username,
1219
         URL_FLAGS_HAS_USERNAME,
1220
         env->username_string(),
1221
1034
         false);
1222
  GetStr(&url_data::password,
1223
         URL_FLAGS_HAS_PASSWORD,
1224
         env->password_string(),
1225
1034
         false);
1226
1034
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1227
1034
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1228
  GetStr(&url_data::fragment,
1229
         URL_FLAGS_HAS_FRAGMENT,
1230
         env->fragment_string(),
1231
1034
         true);
1232
1233
  Local<Value> port =
1234
4136
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1235
1034
  if (port->IsInt32())
1236
8
    base.port = port.As<Int32>()->Value();
1237
1238
  Local<Value>
1239
4136
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1240
1034
  if (path->IsArray()) {
1241
1034
    base.flags |= URL_FLAGS_HAS_PATH;
1242
1034
    base.path = FromJSStringArray(env, path.As<Array>());
1243
  }
1244
1034
  return base;
1245
}
1246
1247
42146
inline url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1248
42146
  url_data context;
1249
  Local<Value> flags =
1250
168584
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1251
42146
  if (flags->IsInt32()) {
1252
    static const int32_t copy_flags_mask =
1253
        URL_FLAGS_SPECIAL |
1254
        URL_FLAGS_CANNOT_BE_BASE |
1255
        URL_FLAGS_HAS_USERNAME |
1256
        URL_FLAGS_HAS_PASSWORD |
1257
        URL_FLAGS_HAS_HOST;
1258
84292
    context.flags |= flags.As<Int32>()->Value() & copy_flags_mask;
1259
  }
1260
  Local<Value> scheme =
1261
168584
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1262
84292
  if (scheme->IsString()) {
1263
42146
    Utf8Value value(env->isolate(), scheme);
1264
42146
    context.scheme.assign(*value, value.length());
1265
  }
1266
  Local<Value> port =
1267
168584
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1268
42146
  if (port->IsInt32())
1269
44
    context.port = port.As<Int32>()->Value();
1270
42146
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
1271
    Local<Value> username =
1272
        context_obj->Get(env->context(),
1273
56
                         env->username_string()).ToLocalChecked();
1274
28
    CHECK(username->IsString());
1275
14
    Utf8Value value(env->isolate(), username);
1276
14
    context.username.assign(*value, value.length());
1277
  }
1278
42146
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1279
    Local<Value> password =
1280
        context_obj->Get(env->context(),
1281
32
                         env->password_string()).ToLocalChecked();
1282
16
    CHECK(password->IsString());
1283
8
    Utf8Value value(env->isolate(), password);
1284
8
    context.password.assign(*value, value.length());
1285
  }
1286
  Local<Value> host =
1287
      context_obj->Get(env->context(),
1288
168584
                       env->host_string()).ToLocalChecked();
1289
84292
  if (host->IsString()) {
1290
42136
    Utf8Value value(env->isolate(), host);
1291
42136
    context.host.assign(*value, value.length());
1292
  }
1293
42146
  return context;
1294
}
1295
1296
// Single dot segment can be ".", "%2e", or "%2E"
1297
3929401
inline bool IsSingleDotSegment(const std::string& str) {
1298
3929401
  switch (str.size()) {
1299
    case 1:
1300
1262
      return str == ".";
1301
    case 3:
1302
273068
      return str[0] == '%' &&
1303

273064
             str[1] == '2' &&
1304
273064
             ASCIILowercase(str[2]) == 'e';
1305
    default:
1306
3655101
      return false;
1307
  }
1308
}
1309
1310
// Double dot segment can be:
1311
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1312
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
1313
1966592
inline bool IsDoubleDotSegment(const std::string& str) {
1314

1966592
  switch (str.size()) {
1315
    case 2:
1316
4163
      return str == "..";
1317
    case 4:
1318

413315
      if (str[0] != '.' && str[0] != '%')
1319
413302
        return false;
1320
23
      return ((str[0] == '.' &&
1321
12
               str[1] == '%' &&
1322
4
               str[2] == '2' &&
1323

31
               ASCIILowercase(str[3]) == 'e') ||
1324
14
              (str[0] == '%' &&
1325
6
               str[1] == '2' &&
1326
6
               ASCIILowercase(str[2]) == 'e' &&
1327
16
               str[3] == '.'));
1328
    case 6:
1329
69045
      return (str[0] == '%' &&
1330
8
              str[1] == '2' &&
1331
6
              ASCIILowercase(str[2]) == 'e' &&
1332
4
              str[3] == '%' &&
1333

69045
              str[4] == '2' &&
1334
69043
              ASCIILowercase(str[5]) == 'e');
1335
    default:
1336
1480073
      return false;
1337
  }
1338
}
1339
1340
4030
inline void ShortenUrlPath(struct url_data* url) {
1341
4030
  if (url->path.empty()) return;
1342


4162
  if (url->path.size() == 1 && url->scheme == "file:" &&
1343
339
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
1344
3823
  url->path.pop_back();
1345
}
1346
1347
}  // anonymous namespace
1348
1349
248075
void URL::Parse(const char* input,
1350
                size_t len,
1351
                enum url_parse_state state_override,
1352
                struct url_data* url,
1353
                bool has_url,
1354
                const struct url_data* base,
1355
                bool has_base) {
1356
248075
  const char* p = input;
1357
248075
  const char* end = input + len;
1358
1359
248075
  if (!has_url) {
1360
166548
    for (const char* ptr = p; ptr < end; ptr++) {
1361
166250
      if (IsC0ControlOrSpace(*ptr))
1362
28
        p++;
1363
      else
1364
166222
        break;
1365
    }
1366
166545
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
1367
166247
      if (IsC0ControlOrSpace(*ptr))
1368
25
        end--;
1369
      else
1370
166222
        break;
1371
    }
1372
166520
    input = p;
1373
166520
    len = end - p;
1374
  }
1375
1376
  // The spec says we should strip out any ASCII tabs or newlines.
1377
  // In those cases, we create another std::string instance with the filtered
1378
  // contents, but in the general case we avoid the overhead.
1379
248075
  std::string whitespace_stripped;
1380
21187260
  for (const char* ptr = p; ptr < end; ptr++) {
1381
20939207
    if (!IsASCIITabOrNewline(*ptr))
1382
20939185
      continue;
1383
    // Hit tab or newline. Allocate storage, copy what we have until now,
1384
    // and then iterate and filter all similar characters out.
1385
22
    whitespace_stripped.reserve(len - 1);
1386
22
    whitespace_stripped.assign(p, ptr - p);
1387
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
1388
318
    for (ptr = ptr + 1; ptr < end; ptr++) {
1389
296
      if (!IsASCIITabOrNewline(*ptr))
1390
254
        whitespace_stripped += *ptr;
1391
    }
1392
1393
    // Update variables like they should have looked like if the string
1394
    // had been stripped of whitespace to begin with.
1395
22
    input = whitespace_stripped.c_str();
1396
22
    len = whitespace_stripped.size();
1397
22
    p = input;
1398
22
    end = input + len;
1399
22
    break;
1400
  }
1401
1402
248075
  bool atflag = false;  // Set when @ has been seen.
1403
248075
  bool square_bracket_flag = false;  // Set inside of [...]
1404
248075
  bool password_token_seen_flag = false;  // Set after a : after an username.
1405
1406
494502
  std::string buffer;
1407
1408
  // Set the initial parse state.
1409
248075
  const bool has_state_override = state_override != kUnknownState;
1410
  enum url_parse_state state = has_state_override ? state_override :
1411
248075
                                                    kSchemeStart;
1412
1413

248075
  if (state < kSchemeStart || state > kFragment) {
1414
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1415
    return;
1416
  }
1417
1418

21975335
  while (p <= end) {
1419
21480833
    const char ch = p < end ? p[0] : kEOL;
1420
21480833
    bool special = (url->flags & URL_FLAGS_SPECIAL);
1421
    bool cannot_be_base;
1422

21480833
    const bool special_back_slash = (special && ch == '\\');
1423
1424





21480833
    switch (state) {
1425
      case kSchemeStart:
1426
166546
        if (IsASCIIAlpha(ch)) {
1427
163215
          buffer += ASCIILowercase(ch);
1428
163215
          state = kScheme;
1429
3331
        } else if (!has_state_override) {
1430
3328
          state = kNoScheme;
1431
3328
          continue;
1432
        } else {
1433
3
          url->flags |= URL_FLAGS_FAILED;
1434
3
          return;
1435
        }
1436
163215
        break;
1437
      case kScheme:
1438


655112
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1439
491897
          buffer += ASCIILowercase(ch);
1440

163215
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1441

162938
          if (has_state_override && buffer.size() == 0) {
1442
            url->flags |= URL_FLAGS_TERMINATED;
1443
            return;
1444
          }
1445
162938
          buffer += ':';
1446
1447
162938
          bool new_is_special = IsSpecial(buffer);
1448
1449
162938
          if (has_state_override) {
1450

45
            if ((special != new_is_special) ||
1451
14
                ((buffer == "file:") &&
1452
4
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1453
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1454
1
                  (url->port != -1)))) {
1455
13
              url->flags |= URL_FLAGS_TERMINATED;
1456
13
              return;
1457
            }
1458
1459
            // File scheme && (host == empty or null) check left to JS-land
1460
            // as it can be done before even entering C++ binding.
1461
          }
1462
1463
162925
          url->scheme = std::move(buffer);
1464
162925
          url->port = NormalizePort(url->scheme, url->port);
1465
162925
          if (new_is_special) {
1466
162098
            url->flags |= URL_FLAGS_SPECIAL;
1467
162098
            special = true;
1468
          } else {
1469
827
            url->flags &= ~URL_FLAGS_SPECIAL;
1470
827
            special = false;
1471
          }
1472
162925
          buffer.clear();
1473
162925
          if (has_state_override)
1474
8
            return;
1475
162917
          if (url->scheme == "file:") {
1476
159859
            state = kFile;
1477

5294
          } else if (special &&
1478

3573
                     has_base &&
1479
515
                     url->scheme == base->scheme) {
1480
178
            state = kSpecialRelativeOrAuthority;
1481
2880
          } else if (special) {
1482
2058
            state = kSpecialAuthoritySlashes;
1483
822
          } else if (p[1] == '/') {
1484
235
            state = kPathOrAuthority;
1485
235
            p++;
1486
          } else {
1487
587
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1488
587
            url->flags |= URL_FLAGS_HAS_PATH;
1489
587
            url->path.emplace_back("");
1490
587
            state = kCannotBeBase;
1491
162917
          }
1492
277
        } else if (!has_state_override) {
1493
275
          buffer.clear();
1494
275
          state = kNoScheme;
1495
275
          p = input;
1496
275
          continue;
1497
        } else {
1498
2
          url->flags |= URL_FLAGS_FAILED;
1499
2
          return;
1500
        }
1501
654814
        break;
1502
      case kNoScheme:
1503

3603
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1504

3603
        if (!has_base || (cannot_be_base && ch != '#')) {
1505
1203
          url->flags |= URL_FLAGS_FAILED;
1506
1203
          return;
1507

2400
        } else if (cannot_be_base && ch == '#') {
1508
14
          url->scheme = base->scheme;
1509
14
          if (IsSpecial(url->scheme)) {
1510
            url->flags |= URL_FLAGS_SPECIAL;
1511
            special = true;
1512
          } else {
1513
14
            url->flags &= ~URL_FLAGS_SPECIAL;
1514
14
            special = false;
1515
          }
1516
14
          if (base->flags & URL_FLAGS_HAS_PATH) {
1517
14
            url->flags |= URL_FLAGS_HAS_PATH;
1518
14
            url->path = base->path;
1519
          }
1520
14
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1521
2
            url->flags |= URL_FLAGS_HAS_QUERY;
1522
2
            url->query = base->query;
1523
          }
1524
14
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1525
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1526
            url->fragment = base->fragment;
1527
          }
1528
14
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1529
14
          state = kFragment;
1530

4772
        } else if (has_base &&
1531
2386
                   base->scheme != "file:") {
1532
144
          state = kRelative;
1533
144
          continue;
1534
        } else {
1535
2242
          url->scheme = "file:";
1536
2242
          url->flags |= URL_FLAGS_SPECIAL;
1537
2242
          special = true;
1538
2242
          state = kFile;
1539
2242
          continue;
1540
        }
1541
14
        break;
1542
      case kSpecialRelativeOrAuthority:
1543

178
        if (ch == '/' && p[1] == '/') {
1544
162
          state = kSpecialAuthorityIgnoreSlashes;
1545
162
          p++;
1546
        } else {
1547
16
          state = kRelative;
1548
16
          continue;
1549
        }
1550
162
        break;
1551
      case kPathOrAuthority:
1552
235
        if (ch == '/') {
1553
181
          state = kAuthority;
1554
        } else {
1555
54
          state = kPath;
1556
54
          continue;
1557
        }
1558
181
        break;
1559
      case kRelative:
1560
160
        url->scheme = base->scheme;
1561
160
        if (IsSpecial(url->scheme)) {
1562
117
          url->flags |= URL_FLAGS_SPECIAL;
1563
117
          special = true;
1564
        } else {
1565
43
          url->flags &= ~URL_FLAGS_SPECIAL;
1566
43
          special = false;
1567
        }
1568

160
        switch (ch) {
1569
          case kEOL:
1570
8
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1571
2
              url->flags |= URL_FLAGS_HAS_USERNAME;
1572
2
              url->username = base->username;
1573
            }
1574
8
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1575
2
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1576
2
              url->password = base->password;
1577
            }
1578
8
            if (base->flags & URL_FLAGS_HAS_HOST) {
1579
8
              url->flags |= URL_FLAGS_HAS_HOST;
1580
8
              url->host = base->host;
1581
            }
1582
8
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1583
              url->flags |= URL_FLAGS_HAS_QUERY;
1584
              url->query = base->query;
1585
            }
1586
8
            if (base->flags & URL_FLAGS_HAS_PATH) {
1587
8
              url->flags |= URL_FLAGS_HAS_PATH;
1588
8
              url->path = base->path;
1589
            }
1590
8
            url->port = base->port;
1591
8
            break;
1592
          case '/':
1593
35
            state = kRelativeSlash;
1594
35
            break;
1595
          case '?':
1596
24
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1597
              url->flags |= URL_FLAGS_HAS_USERNAME;
1598
              url->username = base->username;
1599
            }
1600
24
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1601
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1602
              url->password = base->password;
1603
            }
1604
24
            if (base->flags & URL_FLAGS_HAS_HOST) {
1605
22
              url->flags |= URL_FLAGS_HAS_HOST;
1606
22
              url->host = base->host;
1607
            }
1608
24
            if (base->flags & URL_FLAGS_HAS_PATH) {
1609
24
              url->flags |= URL_FLAGS_HAS_PATH;
1610
24
              url->path = base->path;
1611
            }
1612
24
            url->port = base->port;
1613
24
            state = kQuery;
1614
24
            break;
1615
          case '#':
1616
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1617
              url->flags |= URL_FLAGS_HAS_USERNAME;
1618
              url->username = base->username;
1619
            }
1620
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1621
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1622
              url->password = base->password;
1623
            }
1624
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1625
16
              url->flags |= URL_FLAGS_HAS_HOST;
1626
16
              url->host = base->host;
1627
            }
1628
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1629
              url->flags |= URL_FLAGS_HAS_QUERY;
1630
              url->query = base->query;
1631
            }
1632
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1633
18
              url->flags |= URL_FLAGS_HAS_PATH;
1634
18
              url->path = base->path;
1635
            }
1636
18
            url->port = base->port;
1637
18
            state = kFragment;
1638
18
            break;
1639
          default:
1640
75
            if (special_back_slash) {
1641
4
              state = kRelativeSlash;
1642
            } else {
1643
71
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1644
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1645
1
                url->username = base->username;
1646
              }
1647
71
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1648
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1649
1
                url->password = base->password;
1650
              }
1651
71
              if (base->flags & URL_FLAGS_HAS_HOST) {
1652
65
                url->flags |= URL_FLAGS_HAS_HOST;
1653
65
                url->host = base->host;
1654
              }
1655
71
              if (base->flags & URL_FLAGS_HAS_PATH) {
1656
71
                url->flags |= URL_FLAGS_HAS_PATH;
1657
71
                url->path = base->path;
1658
71
                ShortenUrlPath(url);
1659
              }
1660
71
              url->port = base->port;
1661
71
              state = kPath;
1662
71
              continue;
1663
            }
1664
        }
1665
89
        break;
1666
      case kRelativeSlash:
1667


39
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1668
8
          state = kSpecialAuthorityIgnoreSlashes;
1669
31
        } else if (ch == '/') {
1670
3
          state = kAuthority;
1671
        } else {
1672
28
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1673
4
            url->flags |= URL_FLAGS_HAS_USERNAME;
1674
4
            url->username = base->username;
1675
          }
1676
28
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1677
2
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1678
2
            url->password = base->password;
1679
          }
1680
28
          if (base->flags & URL_FLAGS_HAS_HOST) {
1681
26
            url->flags |= URL_FLAGS_HAS_HOST;
1682
26
            url->host = base->host;
1683
          }
1684
28
          url->port = base->port;
1685
28
          state = kPath;
1686
28
          continue;
1687
        }
1688
11
        break;
1689
      case kSpecialAuthoritySlashes:
1690
2058
        state = kSpecialAuthorityIgnoreSlashes;
1691

2058
        if (ch == '/' && p[1] == '/') {
1692
1966
          p++;
1693
        } else {
1694
92
          continue;
1695
        }
1696
1966
        break;
1697
      case kSpecialAuthorityIgnoreSlashes:
1698

2274
        if (ch != '/' && ch != '\\') {
1699
2228
          state = kAuthority;
1700
2228
          continue;
1701
        }
1702
46
        break;
1703
      case kAuthority:
1704
63707
        if (ch == '@') {
1705
151
          if (atflag) {
1706
13
            buffer.reserve(buffer.size() + 3);
1707
13
            buffer.insert(0, "%40");
1708
          }
1709
151
          atflag = true;
1710
151
          const size_t blen = buffer.size();
1711

151
          if (blen > 0 && buffer[0] != ':') {
1712
93
            url->flags |= URL_FLAGS_HAS_USERNAME;
1713
          }
1714
717
          for (size_t n = 0; n < blen; n++) {
1715
566
            const char bch = buffer[n];
1716
566
            if (bch == ':') {
1717
87
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1718
87
              if (!password_token_seen_flag) {
1719
85
                password_token_seen_flag = true;
1720
85
                continue;
1721
              }
1722
            }
1723
481
            if (password_token_seen_flag) {
1724
189
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1725
            } else {
1726
292
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1727
            }
1728
          }
1729
151
          buffer.clear();
1730

63556
        } else if (ch == kEOL ||
1731
61179
                   ch == '/' ||
1732
61162
                   ch == '?' ||
1733
61152
                   ch == '#' ||
1734
                   special_back_slash) {
1735

2412
          if (atflag && buffer.size() == 0) {
1736
39
            url->flags |= URL_FLAGS_FAILED;
1737
39
            return;
1738
          }
1739
2373
          p -= buffer.size() + 1;
1740
2373
          buffer.clear();
1741
2373
          state = kHost;
1742
        } else {
1743
61144
          buffer += ch;
1744
        }
1745
63668
        break;
1746
      case kHost:
1747
      case kHostname:
1748

62624
        if (has_state_override && url->scheme == "file:") {
1749
6
          state = kFileHost;
1750
6
          continue;
1751

62618
        } else if (ch == ':' && !square_bracket_flag) {
1752
720
          if (buffer.size() == 0) {
1753
19
            url->flags |= URL_FLAGS_FAILED;
1754
19
            return;
1755
          }
1756
701
          url->flags |= URL_FLAGS_HAS_HOST;
1757
701
          if (!ParseHost(buffer, &url->host, special)) {
1758
3
            url->flags |= URL_FLAGS_FAILED;
1759
3
            return;
1760
          }
1761
698
          buffer.clear();
1762
698
          state = kPort;
1763
1394
          if (state_override == kHostname) {
1764
2
            return;
1765
          }
1766

61898
        } else if (ch == kEOL ||
1767
60149
                   ch == '/' ||
1768
60128
                   ch == '?' ||
1769
60114
                   ch == '#' ||
1770
                   special_back_slash) {
1771
1794
          p--;
1772

1794
          if (special && buffer.size() == 0) {
1773
11
            url->flags |= URL_FLAGS_FAILED;
1774
11
            return;
1775
          }
1776

1902
          if (has_state_override &&
1777

1805
              buffer.size() == 0 &&
1778

50
              ((url->username.size() > 0 || url->password.size() > 0) ||
1779
16
               url->port != -1)) {
1780
4
            url->flags |= URL_FLAGS_TERMINATED;
1781
4
            return;
1782
          }
1783
1779
          url->flags |= URL_FLAGS_HAS_HOST;
1784
1779
          if (!ParseHost(buffer, &url->host, special)) {
1785
198
            url->flags |= URL_FLAGS_FAILED;
1786
198
            return;
1787
          }
1788
1581
          buffer.clear();
1789
1581
          state = kPathStart;
1790
3083
          if (has_state_override) {
1791
79
            return;
1792
          }
1793
        } else {
1794
60104
          if (ch == '[')
1795
93
            square_bracket_flag = true;
1796
60104
          if (ch == ']')
1797
90
            square_bracket_flag = false;
1798
60104
          buffer += ch;
1799
        }
1800
62302
        break;
1801
      case kPort:
1802
3445
        if (IsASCIIDigit(ch)) {
1803
2732
          buffer += ch;
1804

713
        } else if (has_state_override ||
1805
375
                   ch == kEOL ||
1806
27
                   ch == '/' ||
1807
27
                   ch == '?' ||
1808
27
                   ch == '#' ||
1809
                   special_back_slash) {
1810
686
          if (buffer.size() > 0) {
1811
681
            unsigned port = 0;
1812
            // the condition port <= 0xffff prevents integer overflow
1813

3251
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1814
2570
              port = port * 10 + buffer[i] - '0';
1815
681
            if (port > 0xffff) {
1816
              // TODO(TimothyGu): This hack is currently needed for the host
1817
              // setter since it needs access to hostname if it is valid, and
1818
              // if the FAILED flag is set the entire response to JS layer
1819
              // will be empty.
1820
18
              if (state_override == kHost)
1821
1
                url->port = -1;
1822
              else
1823
17
                url->flags |= URL_FLAGS_FAILED;
1824
18
              return;
1825
            }
1826
            // the port is valid
1827
663
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1828
663
            if (url->port == -1)
1829
29
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1830
663
            buffer.clear();
1831
5
          } else if (has_state_override) {
1832
            // TODO(TimothyGu): Similar case as above.
1833
1
            if (state_override == kHost)
1834
1
              url->port = -1;
1835
            else
1836
              url->flags |= URL_FLAGS_TERMINATED;
1837
1
            return;
1838
          }
1839
667
          state = kPathStart;
1840
667
          continue;
1841
        } else {
1842
27
          url->flags |= URL_FLAGS_FAILED;
1843
27
          return;
1844
        }
1845
2732
        break;
1846
      case kFile:
1847
162101
        url->scheme = "file:";
1848

162101
        if (ch == '/' || ch == '\\') {
1849
159999
          state = kFileSlash;
1850

2102
        } else if (has_base && base->scheme == "file:") {
1851

2095
          switch (ch) {
1852
            case kEOL:
1853
6
              if (base->flags & URL_FLAGS_HAS_HOST) {
1854
6
                url->flags |= URL_FLAGS_HAS_HOST;
1855
6
                url->host = base->host;
1856
              }
1857
6
              if (base->flags & URL_FLAGS_HAS_PATH) {
1858
6
                url->flags |= URL_FLAGS_HAS_PATH;
1859
6
                url->path = base->path;
1860
              }
1861
6
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1862
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1863
2
                url->query = base->query;
1864
              }
1865
6
              break;
1866
            case '?':
1867
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1868
2
                url->flags |= URL_FLAGS_HAS_HOST;
1869
2
                url->host = base->host;
1870
              }
1871
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1872
2
                url->flags |= URL_FLAGS_HAS_PATH;
1873
2
                url->path = base->path;
1874
              }
1875
2
              url->flags |= URL_FLAGS_HAS_QUERY;
1876
2
              url->query.clear();
1877
2
              state = kQuery;
1878
2
              break;
1879
            case '#':
1880
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1881
2
                url->flags |= URL_FLAGS_HAS_HOST;
1882
2
                url->host = base->host;
1883
              }
1884
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1885
2
                url->flags |= URL_FLAGS_HAS_PATH;
1886
2
                url->path = base->path;
1887
              }
1888
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1889
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1890
2
                url->query = base->query;
1891
              }
1892
2
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1893
2
              url->fragment.clear();
1894
2
              state = kFragment;
1895
2
              break;
1896
            default:
1897
2085
              if (!StartsWithWindowsDriveLetter(p, end)) {
1898
2076
                if (base->flags & URL_FLAGS_HAS_HOST) {
1899
2076
                  url->flags |= URL_FLAGS_HAS_HOST;
1900
2076
                  url->host = base->host;
1901
                }
1902
2076
                if (base->flags & URL_FLAGS_HAS_PATH) {
1903
2076
                  url->flags |= URL_FLAGS_HAS_PATH;
1904
2076
                  url->path = base->path;
1905
                }
1906
2076
                ShortenUrlPath(url);
1907
              }
1908
2085
              state = kPath;
1909
2085
              continue;
1910
          }
1911
        } else {
1912
7
          state = kPath;
1913
7
          continue;
1914
        }
1915
160009
        break;
1916
      case kFileSlash:
1917

159999
        if (ch == '/' || ch == '\\') {
1918
159855
          state = kFileHost;
1919
        } else {
1920

288
          if (has_base &&
1921

285
              base->scheme == "file:" &&
1922
141
              !StartsWithWindowsDriveLetter(p, end)) {
1923
136
            if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1924
1
              url->flags |= URL_FLAGS_HAS_PATH;
1925
1
              url->path.push_back(base->path[0]);
1926
            } else {
1927
135
              if (base->flags & URL_FLAGS_HAS_HOST) {
1928
135
                url->flags |= URL_FLAGS_HAS_HOST;
1929
135
                url->host = base->host;
1930
              } else {
1931
                url->flags &= ~URL_FLAGS_HAS_HOST;
1932
                url->host.clear();
1933
              }
1934
            }
1935
          }
1936
144
          state = kPath;
1937
144
          continue;
1938
        }
1939
159855
        break;
1940
      case kFileHost:
1941

160252
        if (ch == kEOL ||
1942
396
            ch == '/' ||
1943
391
            ch == '\\' ||
1944
391
            ch == '?' ||
1945
            ch == '#') {
1946

479577
          if (!has_state_override &&
1947

159868
              buffer.size() == 2 &&
1948
7
              IsWindowsDriveLetter(buffer)) {
1949
4
            state = kPath;
1950
159857
          } else if (buffer.size() == 0) {
1951
159794
            url->flags |= URL_FLAGS_HAS_HOST;
1952
159794
            url->host.clear();
1953
159794
            if (has_state_override)
1954
2
              return;
1955
159792
            state = kPathStart;
1956
          } else {
1957
63
            std::string host;
1958
63
            if (!ParseHost(buffer, &host, special)) {
1959
14
              url->flags |= URL_FLAGS_FAILED;
1960
14
              return;
1961
            }
1962
49
            if (host == "localhost")
1963
11
              host.clear();
1964
49
            url->flags |= URL_FLAGS_HAS_HOST;
1965
49
            url->host = host;
1966
49
            if (has_state_override)
1967
2
              return;
1968
47
            buffer.clear();
1969
47
            state = kPathStart;
1970
          }
1971
159843
          continue;
1972
        } else {
1973
391
          buffer += ch;
1974
        }
1975
391
        break;
1976
      case kPathStart:
1977
243239
        if (IsSpecial(url->scheme)) {
1978
243079
          state = kPath;
1979

243079
          if (ch != '/' && ch != '\\') {
1980
81720
            continue;
1981
          }
1982

160
        } else if (!has_state_override && ch == '?') {
1983
3
          url->flags |= URL_FLAGS_HAS_QUERY;
1984
3
          url->query.clear();
1985
3
          state = kQuery;
1986

157
        } else if (!has_state_override && ch == '#') {
1987
3
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1988
3
          url->fragment.clear();
1989
3
          state = kFragment;
1990
154
        } else if (ch != kEOL) {
1991
121
          state = kPath;
1992
121
          if (ch != '/') {
1993
6
            continue;
1994
          }
1995
        }
1996
161513
        break;
1997
      case kPath:
1998

19758482
        if (ch == kEOL ||
1999
17792237
            ch == '/' ||
2000
17792204
            special_back_slash ||
2001

26592810
            (!has_state_override && (ch == '?' || ch == '#'))) {
2002
1966592
          if (IsDoubleDotSegment(buffer)) {
2003
1883
            ShortenUrlPath(url);
2004

1883
            if (ch != '/' && !special_back_slash) {
2005
18
              url->flags |= URL_FLAGS_HAS_PATH;
2006
18
              url->path.emplace_back("");
2007
            }
2008

3929742
          } else if (IsSingleDotSegment(buffer) &&
2009

1964726
                     ch != '/' && !special_back_slash) {
2010
16
            url->flags |= URL_FLAGS_HAS_PATH;
2011
16
            url->path.emplace_back("");
2012
1964693
          } else if (!IsSingleDotSegment(buffer)) {
2013

5887203
            if (url->scheme == "file:" &&
2014
2160573
                url->path.empty() &&
2015

2166558
                buffer.size() == 2 &&
2016
35
                IsWindowsDriveLetter(buffer)) {
2017

49
              if ((url->flags & URL_FLAGS_HAS_HOST) &&
2018
15
                  !url->host.empty()) {
2019
3
                url->host.clear();
2020
3
                url->flags |= URL_FLAGS_HAS_HOST;
2021
              }
2022
34
              buffer[1] = ':';
2023
            }
2024
1964384
            url->flags |= URL_FLAGS_HAS_PATH;
2025
1964384
            url->path.emplace_back(std::move(buffer));
2026
          }
2027
1966592
          buffer.clear();
2028

2209885
          if (url->scheme == "file:" &&
2029
1717273
              (ch == kEOL ||
2030
1717263
               ch == '?' ||
2031
               ch == '#')) {
2032

526056
            while (url->path.size() > 1 && url->path[0].length() == 0) {
2033
39470
              url->path.erase(url->path.begin());
2034
            }
2035
          }
2036
3933184
          if (ch == '?') {
2037
103
            url->flags |= URL_FLAGS_HAS_QUERY;
2038
103
            state = kQuery;
2039
1966489
          } else if (ch == '#') {
2040
211
            state = kFragment;
2041
          }
2042
        } else {
2043
17791890
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2044
        }
2045
19758482
        break;
2046
      case kCannotBeBase:
2047
4910
        switch (ch) {
2048
          case '?':
2049
2
            state = kQuery;
2050
2
            break;
2051
          case '#':
2052
5
            state = kFragment;
2053
5
            break;
2054
          default:
2055
4903
            if (url->path.size() == 0)
2056
              url->path.emplace_back("");
2057

4903
            if (url->path.size() > 0 && ch != kEOL)
2058
4323
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2059
        }
2060
4910
        break;
2061
      case kQuery:
2062

2930
        if (ch == kEOL || (!has_state_override && ch == '#')) {
2063
249
          url->flags |= URL_FLAGS_HAS_QUERY;
2064
249
          url->query = std::move(buffer);
2065
249
          buffer.clear();
2066
498
          if (ch == '#')
2067
50
            state = kFragment;
2068
        } else {
2069
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2070
2681
                                                QUERY_ENCODE_SET_NONSPECIAL);
2071
        }
2072
2930
        break;
2073
      case kFragment:
2074
28939
        switch (ch) {
2075
          case kEOL:
2076
322
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
2077
322
            url->fragment = std::move(buffer);
2078
322
            break;
2079
          case 0:
2080
2
            break;
2081
          default:
2082
28615
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2083
        }
2084
28939
        break;
2085
      default:
2086
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2087
        return;
2088
    }
2089
2090
21226229
    p++;
2091
246427
  }
2092
}  // NOLINT(readability/fn_size)
2093
2094
165312
static inline void SetArgs(Environment* env,
2095
                           Local<Value> argv[ARG_COUNT],
2096
                           const struct url_data& url) {
2097
165312
  Isolate* isolate = env->isolate();
2098
330624
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2099
330624
  argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str());
2100
165312
  if (url.flags & URL_FLAGS_HAS_USERNAME)
2101
164
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2102
165312
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
2103
152
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2104
165312
  if (url.flags & URL_FLAGS_HAS_HOST)
2105
329220
    argv[ARG_HOST] = Utf8String(isolate, url.host);
2106
165312
  if (url.flags & URL_FLAGS_HAS_QUERY)
2107
510
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
2108
165312
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2109
644
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2110
165312
  if (url.port > -1)
2111
1294
    argv[ARG_PORT] = Integer::New(isolate, url.port);
2112
165312
  if (url.flags & URL_FLAGS_HAS_PATH)
2113
330088
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
2114
165312
}
2115
2116
166351
static void Parse(Environment* env,
2117
                  Local<Value> recv,
2118
                  const char* input,
2119
                  const size_t len,
2120
                  enum url_parse_state state_override,
2121
                  Local<Value> base_obj,
2122
                  Local<Value> context_obj,
2123
                  Local<Function> cb,
2124
                  Local<Value> error_cb) {
2125
166351
  Isolate* isolate = env->isolate();
2126
166351
  Local<Context> context = env->context();
2127
166351
  HandleScope handle_scope(isolate);
2128
166334
  Context::Scope context_scope(context);
2129
2130
166351
  const bool has_context = context_obj->IsObject();
2131
166351
  const bool has_base = base_obj->IsObject();
2132
2133
332685
  url_data base;
2134
332685
  url_data url;
2135
166351
  if (has_context)
2136
42146
    url = HarvestContext(env, context_obj.As<Object>());
2137
166351
  if (has_base)
2138
1034
    base = HarvestBase(env, base_obj.As<Object>());
2139
2140
166351
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2141

166351
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2142
42146
      ((state_override != kUnknownState) &&
2143
42146
       (url.flags & URL_FLAGS_TERMINATED)))
2144
166368
    return;
2145
2146
  // Define the return value placeholders
2147
  const Local<Value> undef = Undefined(isolate);
2148
  const Local<Value> null = Null(isolate);
2149
166334
  if (!(url.flags & URL_FLAGS_FAILED)) {
2150
    Local<Value> argv[] = {
2151
      undef,
2152
      undef,
2153
      undef,
2154
      undef,
2155
      null,  // host defaults to null
2156
      null,  // port defaults to null
2157
      undef,
2158
      null,  // query defaults to null
2159
      null,  // fragment defaults to null
2160
165118
    };
2161
165118
    SetArgs(env, argv, url);
2162
495354
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2163
1216
  } else if (error_cb->IsFunction()) {
2164
1169
    Local<Value> argv[2] = { undef, undef };
2165
2338
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2166
    argv[ERR_ARG_INPUT] =
2167
      String::NewFromUtf8(env->isolate(),
2168
                          input,
2169
2338
                          NewStringType::kNormal).ToLocalChecked();
2170
4676
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2171
3507
        .FromMaybe(Local<Value>());
2172
166334
  }
2173
}
2174
2175
166351
static void Parse(const FunctionCallbackInfo<Value>& args) {
2176
166351
  Environment* env = Environment::GetCurrent(args);
2177
166351
  CHECK_GE(args.Length(), 5);
2178
499053
  CHECK(args[0]->IsString());  // input
2179



797012
  CHECK(args[2]->IsUndefined() ||  // base context
2180
        args[2]->IsNull() ||
2181
        args[2]->IsObject());
2182



876134
  CHECK(args[3]->IsUndefined() ||  // context
2183
        args[3]->IsNull() ||
2184
        args[3]->IsObject());
2185
332702
  CHECK(args[4]->IsFunction());  // complete callback
2186


913814
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
2187
2188
166351
  Utf8Value input(env->isolate(), args[0]);
2189
166351
  enum url_parse_state state_override = kUnknownState;
2190
332702
  if (args[1]->IsNumber()) {
2191
    state_override = static_cast<enum url_parse_state>(
2192
665404
        args[1]->Uint32Value(env->context()).FromJust());
2193
  }
2194
2195
  Parse(env, args.This(),
2196
166351
        *input, input.length(),
2197
        state_override,
2198
        args[2],
2199
        args[3],
2200
        args[4].As<Function>(),
2201
499053
        args[5]);
2202
166351
}
2203
2204
22
static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2205
22
  Environment* env = Environment::GetCurrent(args);
2206
22
  CHECK_GE(args.Length(), 1);
2207
66
  CHECK(args[0]->IsString());
2208
22
  Utf8Value value(env->isolate(), args[0]);
2209
44
  std::string output;
2210
22
  const size_t len = value.length();
2211
22
  output.reserve(len);
2212
233
  for (size_t n = 0; n < len; n++) {
2213
211
    const char ch = (*value)[n];
2214
211
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2215
  }
2216
  args.GetReturnValue().Set(
2217
      String::NewFromUtf8(env->isolate(),
2218
                          output.c_str(),
2219
88
                          NewStringType::kNormal).ToLocalChecked());
2220
22
}
2221
2222
11
static void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2223
11
  Environment* env = Environment::GetCurrent(args);
2224
11
  CHECK_GE(args.Length(), 2);
2225
33
  CHECK(args[0]->IsString());
2226
22
  CHECK(args[1]->IsNumber());
2227
2228
11
  TwoByteValue value(env->isolate(), args[0]);
2229
11
  const size_t n = value.length();
2230
2231
44
  const int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2232
11
  CHECK_GE(start, 0);
2233
2234
32
  for (size_t i = start; i < n; i++) {
2235
21
    char16_t c = value[i];
2236
21
    if (!IsUnicodeSurrogate(c)) {
2237
8
      continue;
2238

13
    } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) {
2239
12
      value[i] = kUnicodeReplacementCharacter;
2240
    } else {
2241
1
      char16_t d = value[i + 1];
2242
1
      if (IsUnicodeTrail(d)) {
2243
        i++;
2244
      } else {
2245
1
        value[i] = kUnicodeReplacementCharacter;
2246
      }
2247
    }
2248
  }
2249
2250
  args.GetReturnValue().Set(
2251
      String::NewFromTwoByte(env->isolate(),
2252
11
                             *value,
2253
                             NewStringType::kNormal,
2254
44
                             n).ToLocalChecked());
2255
11
}
2256
2257
223
static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2258
223
  Environment* env = Environment::GetCurrent(args);
2259
223
  CHECK_GE(args.Length(), 1);
2260
669
  CHECK(args[0]->IsString());
2261
223
  Utf8Value value(env->isolate(), args[0]);
2262
2263
436
  URLHost host;
2264
  // Assuming the host is used for a special scheme.
2265
223
  host.ParseHost(*value, value.length(), true);
2266
223
  if (host.ParsingFailed()) {
2267
30
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2268
233
    return;
2269
  }
2270
426
  std::string out = host.ToStringMove();
2271
  args.GetReturnValue().Set(
2272
      String::NewFromUtf8(env->isolate(),
2273
                          out.c_str(),
2274
852
                          NewStringType::kNormal).ToLocalChecked());
2275
}
2276
2277
202
static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2278
202
  Environment* env = Environment::GetCurrent(args);
2279
202
  CHECK_GE(args.Length(), 1);
2280
606
  CHECK(args[0]->IsString());
2281
202
  Utf8Value value(env->isolate(), args[0]);
2282
2283
395
  URLHost host;
2284
  // Assuming the host is used for a special scheme.
2285
202
  host.ParseHost(*value, value.length(), true, true);
2286
202
  if (host.ParsingFailed()) {
2287
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2288
211
    return;
2289
  }
2290
386
  std::string out = host.ToStringMove();
2291
  args.GetReturnValue().Set(
2292
      String::NewFromUtf8(env->isolate(),
2293
                          out.c_str(),
2294
772
                          NewStringType::kNormal).ToLocalChecked());
2295
}
2296
2297
2195
std::string URL::ToFilePath() const {
2298
2195
  if (context_.scheme != "file:") {
2299
2
    return "";
2300
  }
2301
2302
#ifdef _WIN32
2303
  const char* slash = "\\";
2304
  auto is_slash = [] (char ch) {
2305
    return ch == '/' || ch == '\\';
2306
  };
2307
#else
2308
2193
  const char* slash = "/";
2309
132712
  auto is_slash = [] (char ch) {
2310
    return ch == '/';
2311
132712
  };
2312

4386
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2313
2193
      context_.host.length() > 0) {
2314
    return "";
2315
  }
2316
#endif
2317
2193
  std::string decoded_path;
2318
15708
  for (const std::string& part : context_.path) {
2319
13516
    std::string decoded = PercentDecode(part.c_str(), part.length());
2320
146227
    for (char& ch : decoded) {
2321
132712
      if (is_slash(ch)) {
2322
1
        return "";
2323
      }
2324
    }
2325
13515
    decoded_path += slash + decoded;
2326
13515
  }
2327
2328
#ifdef _WIN32
2329
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2330
2331
  // If hostname is set, then we have a UNC path. Pass the hostname through
2332
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
2333
  // need to worry about percent encoding because the URL parser will have
2334
  // already taken care of that for us. Note that this only causes IDNs with an
2335
  // appropriate `xn--` prefix to be decoded.
2336
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2337
      context_.host.length() > 0) {
2338
    std::string unicode_host;
2339
    if (!ToUnicode(context_.host, &unicode_host)) {
2340
      return "";
2341
    }
2342
    return "\\\\" + unicode_host + decoded_path;
2343
  }
2344
  // Otherwise, it's a local path that requires a drive letter.
2345
  if (decoded_path.length() < 3) {
2346
    return "";
2347
  }
2348
  if (decoded_path[2] != ':' ||
2349
      !IsASCIIAlpha(decoded_path[1])) {
2350
    return "";
2351
  }
2352
  // Strip out the leading '\'.
2353
  return decoded_path.substr(1);
2354
#else
2355
2192
  return decoded_path;
2356
#endif
2357
}
2358
2359
39409
URL URL::FromFilePath(const std::string& file_path) {
2360
39409
  URL url("file://");
2361
78818
  std::string escaped_file_path;
2362
4895002
  for (size_t i = 0; i < file_path.length(); ++i) {
2363
4855593
    escaped_file_path += file_path[i];
2364
4855593
    if (file_path[i] == '%')
2365
6
      escaped_file_path += "25";
2366
  }
2367
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2368
39409
             &url.context_, true, nullptr, false);
2369
78818
  return url;
2370
}
2371
2372
// This function works by calling out to a JS function that creates and
2373
// returns the JS URL object. Be mindful of the JS<->Native boundary
2374
// crossing that is required.
2375
194
MaybeLocal<Value> URL::ToObject(Environment* env) const {
2376
194
  Isolate* isolate = env->isolate();
2377
194
  Local<Context> context = env->context();
2378
  Context::Scope context_scope(context);
2379
2380
  const Local<Value> undef = Undefined(isolate);
2381
  const Local<Value> null = Null(isolate);
2382
2383
194
  if (context_.flags & URL_FLAGS_FAILED)
2384
    return Local<Value>();
2385
2386
  Local<Value> argv[] = {
2387
    undef,
2388
    undef,
2389
    undef,
2390
    undef,
2391
    null,  // host defaults to null
2392
    null,  // port defaults to null
2393
    undef,
2394
    null,  // query defaults to null
2395
    null,  // fragment defaults to null
2396
194
  };
2397
194
  SetArgs(env, argv, context_);
2398
2399
  MaybeLocal<Value> ret;
2400
  {
2401
194
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
2402
2403
    // The SetURLConstructor method must have been called already to
2404
    // set the constructor function used below. SetURLConstructor is
2405
    // called automatically when the internal/url.js module is loaded
2406
    // during the internal/bootstrap/node.js processing.
2407
    ret = env->url_constructor_function()
2408
388
        ->Call(env->context(), undef, arraysize(argv), argv);
2409
  }
2410
2411
194
  return ret;
2412
}
2413
2414
5099
static void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2415
5099
  Environment* env = Environment::GetCurrent(args);
2416
5099
  CHECK_EQ(args.Length(), 1);
2417
10198
  CHECK(args[0]->IsFunction());
2418
10198
  env->set_url_constructor_function(args[0].As<Function>());
2419
5099
}
2420
2421
5099
static void Initialize(Local<Object> target,
2422
                       Local<Value> unused,
2423
                       Local<Context> context,
2424
                       void* priv) {
2425
5099
  Environment* env = Environment::GetCurrent(context);
2426
5099
  env->SetMethod(target, "parse", Parse);
2427
5099
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2428
5099
  env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2429
5099
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2430
5099
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2431
5099
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2432
2433
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2434
265148
  FLAGS(XX)
2435
#undef XX
2436
2437
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
2438
428316
  PARSESTATES(XX)
2439
#undef XX
2440
5099
}
2441
}  // namespace url
2442
}  // namespace node
2443
2444
4953
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)