GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_url.cc Lines: 1162 1206 96.4 %
Date: 2020-05-27 22:15:15 Branches: 1082 1202 90.0 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_i18n.h"
5
#include "util-inl.h"
6
7
#include <cmath>
8
#include <cstdio>
9
#include <string>
10
#include <vector>
11
12
namespace node {
13
14
using errors::TryCatchScope;
15
16
using v8::Array;
17
using v8::Context;
18
using v8::Function;
19
using v8::FunctionCallbackInfo;
20
using v8::HandleScope;
21
using v8::Int32;
22
using v8::Integer;
23
using v8::Isolate;
24
using v8::Local;
25
using v8::MaybeLocal;
26
using v8::NewStringType;
27
using v8::Null;
28
using v8::Object;
29
using v8::String;
30
using v8::Undefined;
31
using v8::Value;
32
33
160320
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
34
320640
  return String::NewFromUtf8(isolate,
35
                             str.data(),
36
                             NewStringType::kNormal,
37
320640
                             str.length()).ToLocalChecked();
38
}
39
40
namespace url {
41
42
namespace {
43
44
// https://url.spec.whatwg.org/#eof-code-point
45
constexpr char kEOL = -1;
46
47
// Used in ToUSVString().
48
constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD;
49
50
// https://url.spec.whatwg.org/#concept-host
51
2357
class URLHost {
52
 public:
53
  ~URLHost();
54
55
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
56
  void ParseIPv6Host(const char* input, size_t length);
57
  void ParseOpaqueHost(const char* input, size_t length);
58
  void ParseHost(const char* input,
59
                 size_t length,
60
                 bool is_special,
61
                 bool unicode = false);
62
63
2357
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
64
  std::string ToString() const;
65
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
66
  std::string ToStringMove();
67
68
 private:
69
  enum class HostType {
70
    H_FAILED,
71
    H_DOMAIN,
72
    H_IPV4,
73
    H_IPV6,
74
    H_OPAQUE,
75
  };
76
77
  union Value {
78
    std::string domain_or_opaque;
79
    uint32_t ipv4;
80
    uint16_t ipv6[8];
81
82
2357
    ~Value() {}
83
2357
    Value() : ipv4(0) {}
84
  };
85
86
  Value value_;
87
  HostType type_ = HostType::H_FAILED;
88
89
6493
  void Reset() {
90
    using string = std::string;
91
6493
    switch (type_) {
92
      case HostType::H_DOMAIN:
93
      case HostType::H_OPAQUE:
94
2030
        value_.domain_or_opaque.~string();
95
2030
        break;
96
      default:
97
4463
        break;
98
    }
99
6493
    type_ = HostType::H_FAILED;
100
6493
  }
101
102
  // Setting the string members of the union with = is brittle because
103
  // it relies on them being initialized to a state that requires no
104
  // destruction of old data.
105
  // For a long time, that worked well enough because ParseIPv6Host() happens
106
  // to zero-fill `value_`, but that really is relying on standard library
107
  // internals too much.
108
  // These helpers are the easiest solution but we might want to consider
109
  // just not forcing strings into an union.
110
133
  void SetOpaque(std::string&& string) {
111
133
    Reset();
112
133
    type_ = HostType::H_OPAQUE;
113
133
    new(&value_.domain_or_opaque) std::string(std::move(string));
114
133
  }
115
116
1897
  void SetDomain(std::string&& string) {
117
1897
    Reset();
118
1897
    type_ = HostType::H_DOMAIN;
119
1897
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
1897
  }
121
};
122
123
4714
URLHost::~URLHost() {
124
2357
  Reset();
125
2357
}
126
127
#define ARGS(XX)                                                              \
128
  XX(ARG_FLAGS)                                                               \
129
  XX(ARG_PROTOCOL)                                                            \
130
  XX(ARG_USERNAME)                                                            \
131
  XX(ARG_PASSWORD)                                                            \
132
  XX(ARG_HOST)                                                                \
133
  XX(ARG_PORT)                                                                \
134
  XX(ARG_PATH)                                                                \
135
  XX(ARG_QUERY)                                                               \
136
  XX(ARG_FRAGMENT)                                                            \
137
  XX(ARG_COUNT)  // This one has to be last.
138
139
#define ERR_ARGS(XX)                                                          \
140
  XX(ERR_ARG_FLAGS)                                                           \
141
  XX(ERR_ARG_INPUT)                                                           \
142
143
enum url_cb_args {
144
#define XX(name) name,
145
  ARGS(XX)
146
#undef XX
147
};
148
149
enum url_error_cb_args {
150
#define XX(name) name,
151
  ERR_ARGS(XX)
152
#undef XX
153
};
154
155
#define CHAR_TEST(bits, name, expr)                                           \
156
  template <typename T>                                                       \
157
  bool name(const T ch) {                                              \
158
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
159
                  "Character must be wider than " #bits " bits");             \
160
    return (expr);                                                            \
161
  }
162
163
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
164
  template <typename T>                                                       \
165
  bool name(const T ch1, const T ch2) {                                \
166
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
167
                  "Character must be wider than " #bits " bits");             \
168
    return (expr);                                                            \
169
  }                                                                           \
170
  template <typename T>                                                       \
171
  bool name(const std::basic_string<T>& str) {                         \
172
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
173
                  "Character must be wider than " #bits " bits");             \
174
    return str.length() >= 2 && name(str[0], str[1]);                         \
175
  }
176
177
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
178

16355588
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
179
180
// https://infra.spec.whatwg.org/#c0-control-or-space
181

267953
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
182
183
// https://infra.spec.whatwg.org/#ascii-digit
184

536223
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
185
186
// https://infra.spec.whatwg.org/#ascii-hex-digit
187


910
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
188
                               (ch >= 'A' && ch <= 'F') ||
189
                               (ch >= 'a' && ch <= 'f')))
190
191
// https://infra.spec.whatwg.org/#ascii-alpha
192


1194899
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
193
                            (ch >= 'a' && ch <= 'z')))
194
195
// https://infra.spec.whatwg.org/#ascii-alphanumeric
196

529511
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
197
198
// https://infra.spec.whatwg.org/#ascii-lowercase
199
template <typename T>
200
529548
T ASCIILowercase(T ch) {
201
529548
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
202
}
203
204
// https://url.spec.whatwg.org/#forbidden-host-code-point
205








65852
CHAR_TEST(8, IsForbiddenHostCodePoint,
206
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
207
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
208
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
209
          ch == '<' || ch == '>' || ch == '\\' || ch == ']' ||
210
          ch == '^')
211
212
// https://url.spec.whatwg.org/#windows-drive-letter
213


1645
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
214
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
215
216
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
217


526
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
218
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
219
220
// If a UTF-16 character is a low/trailing surrogate.
221
1
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
222
223
// If a UTF-16 character is a surrogate.
224
21
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
225
226
// If a UTF-16 surrogate is a low/trailing one.
227
13
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
228
229
#undef CHAR_TEST
230
#undef TWO_CHAR_STRING_TEST
231
232
const char* hex[256] = {
233
  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
234
  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
235
  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
236
  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
237
  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
238
  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
239
  "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
240
  "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
241
  "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
242
  "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
243
  "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
244
  "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
245
  "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
246
  "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
247
  "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
248
  "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
249
  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
250
  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
251
  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
252
  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
253
  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
254
  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
255
  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
256
  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
257
  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
258
  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
259
  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
260
  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
261
  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
262
  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
263
  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
264
  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
265
};
266
267
const uint8_t C0_CONTROL_ENCODE_SET[32] = {
268
  // 00     01     02     03     04     05     06     07
269
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
270
  // 08     09     0A     0B     0C     0D     0E     0F
271
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
272
  // 10     11     12     13     14     15     16     17
273
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
274
  // 18     19     1A     1B     1C     1D     1E     1F
275
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
276
  // 20     21     22     23     24     25     26     27
277
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
278
  // 28     29     2A     2B     2C     2D     2E     2F
279
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
280
  // 30     31     32     33     34     35     36     37
281
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
282
  // 38     39     3A     3B     3C     3D     3E     3F
283
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
284
  // 40     41     42     43     44     45     46     47
285
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
286
  // 48     49     4A     4B     4C     4D     4E     4F
287
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
288
  // 50     51     52     53     54     55     56     57
289
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
290
  // 58     59     5A     5B     5C     5D     5E     5F
291
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
292
  // 60     61     62     63     64     65     66     67
293
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
294
  // 68     69     6A     6B     6C     6D     6E     6F
295
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
296
  // 70     71     72     73     74     75     76     77
297
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
298
  // 78     79     7A     7B     7C     7D     7E     7F
299
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
300
  // 80     81     82     83     84     85     86     87
301
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
302
  // 88     89     8A     8B     8C     8D     8E     8F
303
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
304
  // 90     91     92     93     94     95     96     97
305
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
306
  // 98     99     9A     9B     9C     9D     9E     9F
307
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
308
  // A0     A1     A2     A3     A4     A5     A6     A7
309
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
310
  // A8     A9     AA     AB     AC     AD     AE     AF
311
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
312
  // B0     B1     B2     B3     B4     B5     B6     B7
313
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
314
  // B8     B9     BA     BB     BC     BD     BE     BF
315
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
316
  // C0     C1     C2     C3     C4     C5     C6     C7
317
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
318
  // C8     C9     CA     CB     CC     CD     CE     CF
319
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
320
  // D0     D1     D2     D3     D4     D5     D6     D7
321
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
322
  // D8     D9     DA     DB     DC     DD     DE     DF
323
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
324
  // E0     E1     E2     E3     E4     E5     E6     E7
325
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
326
  // E8     E9     EA     EB     EC     ED     EE     EF
327
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
328
  // F0     F1     F2     F3     F4     F5     F6     F7
329
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
330
  // F8     F9     FA     FB     FC     FD     FE     FF
331
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
332
};
333
334
const uint8_t FRAGMENT_ENCODE_SET[32] = {
335
  // 00     01     02     03     04     05     06     07
336
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
337
  // 08     09     0A     0B     0C     0D     0E     0F
338
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
339
  // 10     11     12     13     14     15     16     17
340
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
341
  // 18     19     1A     1B     1C     1D     1E     1F
342
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
343
  // 20     21     22     23     24     25     26     27
344
    0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
345
  // 28     29     2A     2B     2C     2D     2E     2F
346
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
347
  // 30     31     32     33     34     35     36     37
348
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
349
  // 38     39     3A     3B     3C     3D     3E     3F
350
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
351
  // 40     41     42     43     44     45     46     47
352
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
353
  // 48     49     4A     4B     4C     4D     4E     4F
354
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
355
  // 50     51     52     53     54     55     56     57
356
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
357
  // 58     59     5A     5B     5C     5D     5E     5F
358
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
359
  // 60     61     62     63     64     65     66     67
360
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
361
  // 68     69     6A     6B     6C     6D     6E     6F
362
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
363
  // 70     71     72     73     74     75     76     77
364
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
365
  // 78     79     7A     7B     7C     7D     7E     7F
366
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
367
  // 80     81     82     83     84     85     86     87
368
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
369
  // 88     89     8A     8B     8C     8D     8E     8F
370
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
371
  // 90     91     92     93     94     95     96     97
372
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
373
  // 98     99     9A     9B     9C     9D     9E     9F
374
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
375
  // A0     A1     A2     A3     A4     A5     A6     A7
376
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
377
  // A8     A9     AA     AB     AC     AD     AE     AF
378
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
379
  // B0     B1     B2     B3     B4     B5     B6     B7
380
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
381
  // B8     B9     BA     BB     BC     BD     BE     BF
382
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
383
  // C0     C1     C2     C3     C4     C5     C6     C7
384
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
385
  // C8     C9     CA     CB     CC     CD     CE     CF
386
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
387
  // D0     D1     D2     D3     D4     D5     D6     D7
388
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
389
  // D8     D9     DA     DB     DC     DD     DE     DF
390
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
391
  // E0     E1     E2     E3     E4     E5     E6     E7
392
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
393
  // E8     E9     EA     EB     EC     ED     EE     EF
394
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
395
  // F0     F1     F2     F3     F4     F5     F6     F7
396
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
397
  // F8     F9     FA     FB     FC     FD     FE     FF
398
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
399
};
400
401
402
const uint8_t PATH_ENCODE_SET[32] = {
403
  // 00     01     02     03     04     05     06     07
404
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
405
  // 08     09     0A     0B     0C     0D     0E     0F
406
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
407
  // 10     11     12     13     14     15     16     17
408
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
409
  // 18     19     1A     1B     1C     1D     1E     1F
410
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
411
  // 20     21     22     23     24     25     26     27
412
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
413
  // 28     29     2A     2B     2C     2D     2E     2F
414
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
415
  // 30     31     32     33     34     35     36     37
416
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
417
  // 38     39     3A     3B     3C     3D     3E     3F
418
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
419
  // 40     41     42     43     44     45     46     47
420
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
421
  // 48     49     4A     4B     4C     4D     4E     4F
422
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
423
  // 50     51     52     53     54     55     56     57
424
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
425
  // 58     59     5A     5B     5C     5D     5E     5F
426
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
427
  // 60     61     62     63     64     65     66     67
428
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
429
  // 68     69     6A     6B     6C     6D     6E     6F
430
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
431
  // 70     71     72     73     74     75     76     77
432
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
433
  // 78     79     7A     7B     7C     7D     7E     7F
434
    0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
435
  // 80     81     82     83     84     85     86     87
436
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
437
  // 88     89     8A     8B     8C     8D     8E     8F
438
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
439
  // 90     91     92     93     94     95     96     97
440
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
441
  // 98     99     9A     9B     9C     9D     9E     9F
442
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
443
  // A0     A1     A2     A3     A4     A5     A6     A7
444
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
445
  // A8     A9     AA     AB     AC     AD     AE     AF
446
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
447
  // B0     B1     B2     B3     B4     B5     B6     B7
448
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
449
  // B8     B9     BA     BB     BC     BD     BE     BF
450
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
451
  // C0     C1     C2     C3     C4     C5     C6     C7
452
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
453
  // C8     C9     CA     CB     CC     CD     CE     CF
454
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
455
  // D0     D1     D2     D3     D4     D5     D6     D7
456
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
457
  // D8     D9     DA     DB     DC     DD     DE     DF
458
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
459
  // E0     E1     E2     E3     E4     E5     E6     E7
460
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
461
  // E8     E9     EA     EB     EC     ED     EE     EF
462
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
463
  // F0     F1     F2     F3     F4     F5     F6     F7
464
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
465
  // F8     F9     FA     FB     FC     FD     FE     FF
466
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
467
};
468
469
const uint8_t USERINFO_ENCODE_SET[32] = {
470
  // 00     01     02     03     04     05     06     07
471
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
472
  // 08     09     0A     0B     0C     0D     0E     0F
473
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
474
  // 10     11     12     13     14     15     16     17
475
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
476
  // 18     19     1A     1B     1C     1D     1E     1F
477
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
478
  // 20     21     22     23     24     25     26     27
479
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
480
  // 28     29     2A     2B     2C     2D     2E     2F
481
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
482
  // 30     31     32     33     34     35     36     37
483
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
484
  // 38     39     3A     3B     3C     3D     3E     3F
485
    0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
486
  // 40     41     42     43     44     45     46     47
487
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
488
  // 48     49     4A     4B     4C     4D     4E     4F
489
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
490
  // 50     51     52     53     54     55     56     57
491
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
492
  // 58     59     5A     5B     5C     5D     5E     5F
493
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
494
  // 60     61     62     63     64     65     66     67
495
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
496
  // 68     69     6A     6B     6C     6D     6E     6F
497
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
498
  // 70     71     72     73     74     75     76     77
499
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
500
  // 78     79     7A     7B     7C     7D     7E     7F
501
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
502
  // 80     81     82     83     84     85     86     87
503
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
504
  // 88     89     8A     8B     8C     8D     8E     8F
505
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
506
  // 90     91     92     93     94     95     96     97
507
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
508
  // 98     99     9A     9B     9C     9D     9E     9F
509
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
510
  // A0     A1     A2     A3     A4     A5     A6     A7
511
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
512
  // A8     A9     AA     AB     AC     AD     AE     AF
513
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
514
  // B0     B1     B2     B3     B4     B5     B6     B7
515
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
516
  // B8     B9     BA     BB     BC     BD     BE     BF
517
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
518
  // C0     C1     C2     C3     C4     C5     C6     C7
519
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
520
  // C8     C9     CA     CB     CC     CD     CE     CF
521
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
522
  // D0     D1     D2     D3     D4     D5     D6     D7
523
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
524
  // D8     D9     DA     DB     DC     DD     DE     DF
525
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
526
  // E0     E1     E2     E3     E4     E5     E6     E7
527
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
528
  // E8     E9     EA     EB     EC     ED     EE     EF
529
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
530
  // F0     F1     F2     F3     F4     F5     F6     F7
531
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
532
  // F8     F9     FA     FB     FC     FD     FE     FF
533
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
534
};
535
536
const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
537
  // 00     01     02     03     04     05     06     07
538
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
539
  // 08     09     0A     0B     0C     0D     0E     0F
540
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
541
  // 10     11     12     13     14     15     16     17
542
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
543
  // 18     19     1A     1B     1C     1D     1E     1F
544
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
545
  // 20     21     22     23     24     25     26     27
546
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
547
  // 28     29     2A     2B     2C     2D     2E     2F
548
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
549
  // 30     31     32     33     34     35     36     37
550
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
551
  // 38     39     3A     3B     3C     3D     3E     3F
552
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
553
  // 40     41     42     43     44     45     46     47
554
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
555
  // 48     49     4A     4B     4C     4D     4E     4F
556
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
557
  // 50     51     52     53     54     55     56     57
558
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
559
  // 58     59     5A     5B     5C     5D     5E     5F
560
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
561
  // 60     61     62     63     64     65     66     67
562
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
563
  // 68     69     6A     6B     6C     6D     6E     6F
564
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
565
  // 70     71     72     73     74     75     76     77
566
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
567
  // 78     79     7A     7B     7C     7D     7E     7F
568
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
569
  // 80     81     82     83     84     85     86     87
570
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
571
  // 88     89     8A     8B     8C     8D     8E     8F
572
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
573
  // 90     91     92     93     94     95     96     97
574
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
575
  // 98     99     9A     9B     9C     9D     9E     9F
576
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
577
  // A0     A1     A2     A3     A4     A5     A6     A7
578
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
579
  // A8     A9     AA     AB     AC     AD     AE     AF
580
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
581
  // B0     B1     B2     B3     B4     B5     B6     B7
582
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
583
  // B8     B9     BA     BB     BC     BD     BE     BF
584
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
585
  // C0     C1     C2     C3     C4     C5     C6     C7
586
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
587
  // C8     C9     CA     CB     CC     CD     CE     CF
588
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
589
  // D0     D1     D2     D3     D4     D5     D6     D7
590
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
591
  // D8     D9     DA     DB     DC     DD     DE     DF
592
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
593
  // E0     E1     E2     E3     E4     E5     E6     E7
594
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
595
  // E8     E9     EA     EB     EC     ED     EE     EF
596
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
597
  // F0     F1     F2     F3     F4     F5     F6     F7
598
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
599
  // F8     F9     FA     FB     FC     FD     FE     FF
600
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
601
};
602
603
// Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
604
const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
605
  // 00     01     02     03     04     05     06     07
606
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
607
  // 08     09     0A     0B     0C     0D     0E     0F
608
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
609
  // 10     11     12     13     14     15     16     17
610
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
611
  // 18     19     1A     1B     1C     1D     1E     1F
612
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
613
  // 20     21     22     23     24     25     26     27
614
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
615
  // 28     29     2A     2B     2C     2D     2E     2F
616
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
617
  // 30     31     32     33     34     35     36     37
618
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
619
  // 38     39     3A     3B     3C     3D     3E     3F
620
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
621
  // 40     41     42     43     44     45     46     47
622
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
623
  // 48     49     4A     4B     4C     4D     4E     4F
624
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
625
  // 50     51     52     53     54     55     56     57
626
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
627
  // 58     59     5A     5B     5C     5D     5E     5F
628
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
629
  // 60     61     62     63     64     65     66     67
630
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
631
  // 68     69     6A     6B     6C     6D     6E     6F
632
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
633
  // 70     71     72     73     74     75     76     77
634
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
635
  // 78     79     7A     7B     7C     7D     7E     7F
636
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
637
  // 80     81     82     83     84     85     86     87
638
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
639
  // 88     89     8A     8B     8C     8D     8E     8F
640
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
641
  // 90     91     92     93     94     95     96     97
642
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
643
  // 98     99     9A     9B     9C     9D     9E     9F
644
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
645
  // A0     A1     A2     A3     A4     A5     A6     A7
646
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
647
  // A8     A9     AA     AB     AC     AD     AE     AF
648
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
649
  // B0     B1     B2     B3     B4     B5     B6     B7
650
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
651
  // B8     B9     BA     BB     BC     BD     BE     BF
652
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
653
  // C0     C1     C2     C3     C4     C5     C6     C7
654
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
655
  // C8     C9     CA     CB     CC     CD     CE     CF
656
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
657
  // D0     D1     D2     D3     D4     D5     D6     D7
658
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
659
  // D8     D9     DA     DB     DC     DD     DE     DF
660
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
661
  // E0     E1     E2     E3     E4     E5     E6     E7
662
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
663
  // E8     E9     EA     EB     EC     ED     EE     EF
664
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
665
  // F0     F1     F2     F3     F4     F5     F6     F7
666
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
667
  // F8     F9     FA     FB     FC     FD     FE     FF
668
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
669
};
670
671
13912894
bool BitAt(const uint8_t a[], const uint8_t i) {
672
13912894
  return !!(a[i >> 3] & (1 << (i & 7)));
673
}
674
675
// Appends ch to str. If ch position in encode_set is set, the ch will
676
// be percent-encoded then appended.
677
13912897
void AppendOrEscape(std::string* str,
678
                           const unsigned char ch,
679
                           const uint8_t encode_set[]) {
680
13912897
  if (BitAt(encode_set, ch))
681
776
    *str += hex[ch];
682
  else
683
13912122
    *str += ch;
684
13912897
}
685
686
template <typename T>
687
578
unsigned hex2bin(const T ch) {
688

578
  if (ch >= '0' && ch <= '9')
689
448
    return ch - '0';
690

130
  if (ch >= 'A' && ch <= 'F')
691
32
    return 10 + (ch - 'A');
692

98
  if (ch >= 'a' && ch <= 'f')
693
98
    return 10 + (ch - 'a');
694
  return static_cast<unsigned>(-1);
695
}
696
697
2111
std::string PercentDecode(const char* input, size_t len) {
698
2111
  std::string dest;
699
2111
  if (len == 0)
700
2
    return dest;
701
2109
  dest.reserve(len);
702
2109
  const char* pointer = input;
703
2109
  const char* end = input + len;
704
705
132811
  while (pointer < end) {
706
65351
    const char ch = pointer[0];
707
65351
    size_t remaining = end - pointer - 1;
708


130519
    if (ch != '%' || remaining < 2 ||
709
192
        (ch == '%' &&
710
381
         (!IsASCIIHexDigit(pointer[1]) ||
711
189
          !IsASCIIHexDigit(pointer[2])))) {
712
65168
      dest += ch;
713
65168
      pointer++;
714
65168
      continue;
715
    } else {
716
183
      unsigned a = hex2bin(pointer[1]);
717
183
      unsigned b = hex2bin(pointer[2]);
718
183
      char c = static_cast<char>(a * 16 + b);
719
183
      dest += c;
720
183
      pointer += 3;
721
    }
722
  }
723
2109
  return dest;
724
}
725
726
#define SPECIALS(XX)                                                          \
727
  XX(ftp, 21, "ftp:")                                                         \
728
  XX(file, -1, "file:")                                                       \
729
  XX(gopher, 70, "gopher:")                                                   \
730
  XX(http, 80, "http:")                                                       \
731
  XX(https, 443, "https:")                                                    \
732
  XX(ws, 80, "ws:")                                                           \
733
  XX(wss, 443, "wss:")
734
735
339247
bool IsSpecial(const std::string& scheme) {
736
#define V(_, __, name) if (scheme == name) return true;
737



339247
  SPECIALS(V);
738
#undef V
739
1166
  return false;
740
}
741
742
159594
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
743
#define V(key, _, name) if (scheme == name)                                  \
744
    return env->url_special_##key##_string();
745



159594
  SPECIALS(V)
746
#undef V
747
  UNREACHABLE();
748
}
749
750
131487
int NormalizePort(const std::string& scheme, int p) {
751
#define V(_, port, name) if (scheme == name && p == port) return -1;
752










131487
  SPECIALS(V);
753
#undef V
754
3066
  return p;
755
}
756
757
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
758
1584
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
759
1584
  size_t length = end - p;
760
1559
  return length >= 2 &&
761

1613
    IsWindowsDriveLetter(p[0], p[1]) &&
762
14
    (length == 2 ||
763
21
      p[2] == '/' ||
764
10
      p[2] == '\\' ||
765
5
      p[2] == '?' ||
766
1586
      p[2] == '#');
767
}
768
769
#if defined(NODE_HAVE_I18N_SUPPORT)
770
193
bool ToUnicode(const std::string& input, std::string* output) {
771
386
  MaybeStackBuffer<char> buf;
772
193
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
773
    return false;
774
193
  output->assign(*buf, buf.length());
775
193
  return true;
776
}
777
778
2098
bool ToASCII(const std::string& input, std::string* output) {
779
4196
  MaybeStackBuffer<char> buf;
780
2098
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
781
69
    return false;
782
2029
  output->assign(*buf, buf.length());
783
2029
  return true;
784
}
785
#else
786
// Intentional non-ops if ICU is not present.
787
bool ToUnicode(const std::string& input, std::string* output) {
788
  *output = input;
789
  return true;
790
}
791
792
bool ToASCII(const std::string& input, std::string* output) {
793
  *output = input;
794
  return true;
795
}
796
#endif
797
798
91
void URLHost::ParseIPv6Host(const char* input, size_t length) {
799
91
  CHECK_EQ(type_, HostType::H_FAILED);
800
91
  unsigned size = arraysize(value_.ipv6);
801
819
  for (unsigned n = 0; n < size; n++)
802
728
    value_.ipv6[n] = 0;
803
91
  uint16_t* piece_pointer = &value_.ipv6[0];
804
91
  uint16_t* const buffer_end = piece_pointer + size;
805
91
  uint16_t* compress_pointer = nullptr;
806
91
  const char* pointer = input;
807
91
  const char* end = pointer + length;
808
  unsigned value, len, numbers_seen;
809
91
  char ch = pointer < end ? pointer[0] : kEOL;
810
91
  if (ch == ':') {
811

33
    if (length < 2 || pointer[1] != ':')
812
3
      return;
813
30
    pointer += 2;
814
30
    ch = pointer < end ? pointer[0] : kEOL;
815
30
    piece_pointer++;
816
30
    compress_pointer = piece_pointer;
817
  }
818
404
  while (ch != kEOL) {
819
219
    if (piece_pointer >= buffer_end)
820
3
      return;
821
216
    if (ch == ':') {
822
17
      if (compress_pointer != nullptr)
823
3
        return;
824
14
      pointer++;
825
14
      ch = pointer < end ? pointer[0] : kEOL;
826
14
      piece_pointer++;
827
14
      compress_pointer = piece_pointer;
828
14
      continue;
829
    }
830
199
    value = 0;
831
199
    len = 0;
832

623
    while (len < 4 && IsASCIIHexDigit(ch)) {
833
212
      value = value * 0x10 + hex2bin(ch);
834
212
      pointer++;
835
212
      ch = pointer < end ? pointer[0] : kEOL;
836
212
      len++;
837
    }
838

199
    switch (ch) {
839
      case '.':
840
43
        if (len == 0)
841
3
          return;
842
40
        pointer -= len;
843
40
        ch = pointer < end ? pointer[0] : kEOL;
844
40
        if (piece_pointer > buffer_end - 2)
845
3
          return;
846
37
        numbers_seen = 0;
847
223
        while (ch != kEOL) {
848
123
          value = 0xffffffff;
849
123
          if (numbers_seen > 0) {
850

86
            if (ch == '.' && numbers_seen < 4) {
851
78
              pointer++;
852
78
              ch = pointer < end ? pointer[0] : kEOL;
853
            } else {
854
8
              return;
855
            }
856
          }
857
115
          if (!IsASCIIDigit(ch))
858
16
            return;
859
343
          while (IsASCIIDigit(ch)) {
860
128
            unsigned number = ch - '0';
861
128
            if (value == 0xffffffff) {
862
99
              value = number;
863
29
            } else if (value == 0) {
864
3
              return;
865
            } else {
866
26
              value = value * 10 + number;
867
            }
868
125
            if (value > 255)
869
3
              return;
870
122
            pointer++;
871
122
            ch = pointer < end ? pointer[0] : kEOL;
872
          }
873
93
          *piece_pointer = *piece_pointer * 0x100 + value;
874
93
          numbers_seen++;
875

93
          if (numbers_seen == 2 || numbers_seen == 4)
876
37
            piece_pointer++;
877
        }
878
7
        if (numbers_seen != 4)
879
3
          return;
880
4
        continue;
881
      case ':':
882
125
        pointer++;
883
125
        ch = pointer < end ? pointer[0] : kEOL;
884
125
        if (ch == kEOL)
885
3
          return;
886
122
        break;
887
      case kEOL:
888
18
        break;
889
      default:
890
13
        return;
891
    }
892
140
    *piece_pointer = value;
893
140
    piece_pointer++;
894
  }
895
896
27
  if (compress_pointer != nullptr) {
897
18
    unsigned swaps = piece_pointer - compress_pointer;
898
18
    piece_pointer = buffer_end - 1;
899

58
    while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
900
20
      uint16_t temp = *piece_pointer;
901
20
      uint16_t* swap_piece = compress_pointer + swaps - 1;
902
20
      *piece_pointer = *swap_piece;
903
20
      *swap_piece = temp;
904
20
       piece_pointer--;
905
20
       swaps--;
906
    }
907

9
  } else if (compress_pointer == nullptr &&
908
             piece_pointer != buffer_end) {
909
3
    return;
910
  }
911
24
  type_ = HostType::H_IPV6;
912
}
913
914
2105
int64_t ParseNumber(const char* start, const char* end) {
915
2105
  unsigned R = 10;
916

2105
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
917
26
    start += 2;
918
26
    R = 16;
919
  }
920
2105
  if (end - start == 0) {
921
4
    return 0;
922

2101
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
923
32
    start++;
924
32
    R = 8;
925
  }
926
2101
  const char* p = start;
927
928
3591
  while (p < end) {
929
2634
    const char ch = p[0];
930

2634
    switch (R) {
931
      case 8:
932

173
        if (ch < '0' || ch > '7')
933
19
          return -1;
934
154
        break;
935
      case 10:
936
2339
        if (!IsASCIIDigit(ch))
937
1868
          return -1;
938
471
        break;
939
      case 16:
940
122
        if (!IsASCIIHexDigit(ch))
941
2
          return -1;
942
120
        break;
943
    }
944
745
    p++;
945
  }
946
212
  return strtoll(start, nullptr, R);
947
}
948
949
1970
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
950
1970
  CHECK_EQ(type_, HostType::H_FAILED);
951
1970
  *is_ipv4 = false;
952
1970
  const char* pointer = input;
953
1970
  const char* mark = input;
954
1970
  const char* end = pointer + length;
955
1970
  int parts = 0;
956
1970
  uint32_t val = 0;
957
  uint64_t numbers[4];
958
1970
  int tooBigNumbers = 0;
959
1970
  if (length == 0)
960
1918
    return;
961
962
38380
  while (pointer <= end) {
963
20104
    const char ch = pointer < end ? pointer[0] : kEOL;
964
20104
    int remaining = end - pointer - 1;
965

20104
    if (ch == '.' || ch == kEOL) {
966
2113
      if (++parts > static_cast<int>(arraysize(numbers)))
967
2
        return;
968
2111
      if (pointer == mark)
969
6
        return;
970
2105
      int64_t n = ParseNumber(mark, pointer);
971
2105
      if (n < 0)
972
1889
        return;
973
974
216
      if (n > 255) {
975
69
        tooBigNumbers++;
976
      }
977
216
      numbers[parts - 1] = n;
978
216
      mark = pointer + 1;
979

216
      if (ch == '.' && remaining == 0)
980
2
        break;
981
    }
982
18205
    pointer++;
983
  }
984
73
  CHECK_GT(parts, 0);
985
73
  *is_ipv4 = true;
986
987
  // If any but the last item in numbers is greater than 255, return failure.
988
  // If the last item in numbers is greater than or equal to
989
  // 256^(5 - the number of items in numbers), return failure.
990

143
  if (tooBigNumbers > 1 ||
991

181
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
992
67
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
993
21
    return;
994
  }
995
996
52
  type_ = HostType::H_IPV4;
997
52
  val = numbers[parts - 1];
998
137
  for (int n = 0; n < parts - 1; n++) {
999
85
    double b = 3 - n;
1000
85
    val += numbers[n] * pow(256, b);
1001
  }
1002
1003
52
  value_.ipv4 = val;
1004
}
1005
1006
162
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
1007
162
  CHECK_EQ(type_, HostType::H_FAILED);
1008
295
  std::string output;
1009
162
  output.reserve(length);
1010
1005
  for (size_t i = 0; i < length; i++) {
1011
872
    const char ch = input[i];
1012

872
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1013
29
      return;
1014
    } else {
1015
843
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1016
    }
1017
  }
1018
1019
133
  SetOpaque(std::move(output));
1020
}
1021
1022
2357
void URLHost::ParseHost(const char* input,
1023
                        size_t length,
1024
                        bool is_special,
1025
                        bool unicode) {
1026
2357
  CHECK_EQ(type_, HostType::H_FAILED);
1027
2357
  const char* pointer = input;
1028
1029
2357
  if (length == 0)
1030
460
    return;
1031
1032
2357
  if (pointer[0] == '[') {
1033
97
    if (pointer[length - 1] != ']')
1034
6
      return;
1035
91
    return ParseIPv6Host(++pointer, length - 2);
1036
  }
1037
1038
2260
  if (!is_special)
1039
162
    return ParseOpaqueHost(input, length);
1040
1041
  // First, we have to percent decode
1042
3995
  std::string decoded = PercentDecode(input, length);
1043
1044
  // Then we have to punycode toASCII
1045
2098
  if (!ToASCII(decoded, &decoded))
1046
69
    return;
1047
1048
  // If any of the following characters are still present, we have to fail
1049
66955
  for (size_t n = 0; n < decoded.size(); n++) {
1050
64985
    const char ch = decoded[n];
1051
64985
    if (IsForbiddenHostCodePoint(ch)) {
1052
59
      return;
1053
    }
1054
  }
1055
1056
  // Check to see if it's an IPv4 IP address
1057
  bool is_ipv4;
1058
1970
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1059
1970
  if (is_ipv4)
1060
73
    return;
1061
1062
  // If the unicode flag is set, run the result through punycode ToUnicode
1063

1897
  if (unicode && !ToUnicode(decoded, &decoded))
1064
    return;
1065
1066
  // It's not an IPv4 or IPv6 address, it must be a domain
1067
1897
  SetDomain(std::move(decoded));
1068
}
1069
1070
// Locates the longest sequence of 0 segments in an IPv6 address
1071
// in order to use the :: compression when serializing
1072
template <typename T>
1073
24
T* FindLongestZeroSequence(T* values, size_t len) {
1074
24
  T* start = values;
1075
24
  T* end = start + len;
1076
24
  T* result = nullptr;
1077
1078
24
  T* current = nullptr;
1079
24
  unsigned counter = 0, longest = 1;
1080
1081
408
  while (start < end) {
1082
192
    if (*start == 0) {
1083
144
      if (current == nullptr)
1084
31
        current = start;
1085
144
      counter++;
1086
    } else {
1087
48
      if (counter > longest) {
1088
20
        longest = counter;
1089
20
        result = current;
1090
      }
1091
48
      counter = 0;
1092
48
      current = nullptr;
1093
    }
1094
192
    start++;
1095
  }
1096
24
  if (counter > longest)
1097
3
    result = current;
1098
24
  return result;
1099
}
1100
1101
2106
std::string URLHost::ToStringMove() {
1102
2106
  std::string return_value;
1103
2106
  switch (type_) {
1104
    case HostType::H_DOMAIN:
1105
    case HostType::H_OPAQUE:
1106
2030
      return_value = std::move(value_.domain_or_opaque);
1107
2030
      break;
1108
    default:
1109
76
      return_value = ToString();
1110
76
      break;
1111
  }
1112
2106
  Reset();
1113
2106
  return return_value;
1114
}
1115
1116
76
std::string URLHost::ToString() const {
1117
152
  std::string dest;
1118

76
  switch (type_) {
1119
    case HostType::H_DOMAIN:
1120
    case HostType::H_OPAQUE:
1121
      return value_.domain_or_opaque;
1122
      break;
1123
    case HostType::H_IPV4: {
1124
52
      dest.reserve(15);
1125
52
      uint32_t value = value_.ipv4;
1126
260
      for (int n = 0; n < 4; n++) {
1127
        char buf[4];
1128
208
        snprintf(buf, sizeof(buf), "%d", value % 256);
1129
208
        dest.insert(0, buf);
1130
208
        if (n < 3)
1131
156
          dest.insert(0, 1, '.');
1132
208
        value /= 256;
1133
      }
1134
52
      break;
1135
    }
1136
    case HostType::H_IPV6: {
1137
24
      dest.reserve(41);
1138
24
      dest += '[';
1139
24
      const uint16_t* start = &value_.ipv6[0];
1140
      const uint16_t* compress_pointer =
1141
24
          FindLongestZeroSequence(start, 8);
1142
24
      bool ignore0 = false;
1143
216
      for (int n = 0; n <= 7; n++) {
1144
192
        const uint16_t* piece = &value_.ipv6[n];
1145

192
        if (ignore0 && *piece == 0)
1146
246
          continue;
1147
80
        else if (ignore0)
1148
19
          ignore0 = false;
1149
80
        if (compress_pointer == piece) {
1150
22
          dest += n == 0 ? "::" : ":";
1151
22
          ignore0 = true;
1152
22
          continue;
1153
        }
1154
        char buf[5];
1155
58
        snprintf(buf, sizeof(buf), "%x", *piece);
1156
58
        dest += buf;
1157
58
        if (n < 7)
1158
37
          dest += ':';
1159
      }
1160
24
      dest += ']';
1161
24
      break;
1162
    }
1163
    case HostType::H_FAILED:
1164
      break;
1165
  }
1166
76
  return dest;
1167
}
1168
1169
1975
bool ParseHost(const std::string& input,
1170
               std::string* output,
1171
               bool is_special,
1172
               bool unicode = false) {
1173
1975
  if (input.length() == 0) {
1174
42
    output->clear();
1175
42
    return true;
1176
  }
1177
3866
  URLHost host;
1178
1933
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1179
1933
  if (host.ParsingFailed())
1180
233
    return false;
1181
1700
  *output = host.ToStringMove();
1182
1700
  return true;
1183
}
1184
1185
2574
std::vector<std::string> FromJSStringArray(Environment* env,
1186
                                           Local<Array> array) {
1187
2574
  std::vector<std::string> vec;
1188
2574
  if (array->Length() > 0)
1189
2566
    vec.reserve(array->Length());
1190
33820
  for (size_t n = 0; n < array->Length(); n++) {
1191
43008
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1192
28672
    if (val->IsString()) {
1193
28672
      Utf8Value value(env->isolate(), val.As<String>());
1194
14336
      vec.emplace_back(*value, value.length());
1195
    }
1196
  }
1197
2574
  return vec;
1198
}
1199
1200
2574
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1201
2574
  url_data base;
1202
2574
  Local<Context> context = env->context();
1203
1204
  Local<Value> flags =
1205
10296
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1206
2574
  if (flags->IsInt32())
1207
5148
    base.flags = flags->Int32Value(context).FromJust();
1208
1209
  Local<Value> port =
1210
10296
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1211
2574
  if (port->IsInt32())
1212
8
    base.port = port->Int32Value(context).FromJust();
1213
1214
  Local<Value> scheme =
1215
10296
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1216
2574
  base.scheme = Utf8Value(env->isolate(), scheme).out();
1217
1218
  auto GetStr = [&](std::string url_data::*member,
1219
                    int flag,
1220
                    Local<String> name,
1221
12870
                    bool empty_as_present) {
1222
45826
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1223
25740
    if (value->IsString()) {
1224
14432
      Utf8Value utf8value(env->isolate(), value.As<String>());
1225
9296
      (base.*member).assign(*utf8value, utf8value.length());
1226

17512
      if (empty_as_present || value.As<String>()->Length() != 0) {
1227
2080
        base.flags |= flag;
1228
      }
1229
    }
1230
15444
  };
1231
2574
  GetStr(&url_data::username,
1232
         URL_FLAGS_HAS_USERNAME,
1233
         env->username_string(),
1234
2574
         false);
1235
2574
  GetStr(&url_data::password,
1236
         URL_FLAGS_HAS_PASSWORD,
1237
         env->password_string(),
1238
2574
         false);
1239
2574
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1240
2574
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1241
2574
  GetStr(&url_data::fragment,
1242
         URL_FLAGS_HAS_FRAGMENT,
1243
         env->fragment_string(),
1244
2574
         true);
1245
1246
  Local<Value>
1247
10296
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1248
2574
  if (path->IsArray()) {
1249
2574
    base.flags |= URL_FLAGS_HAS_PATH;
1250
2574
    base.path = FromJSStringArray(env, path.As<Array>());
1251
  }
1252
2574
  return base;
1253
}
1254
1255
53313
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1256
53313
  url_data context;
1257
  Local<Value> flags =
1258
213256
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1259
53314
  if (flags->IsInt32()) {
1260
    static constexpr int32_t kCopyFlagsMask =
1261
        URL_FLAGS_SPECIAL |
1262
        URL_FLAGS_CANNOT_BE_BASE |
1263
        URL_FLAGS_HAS_USERNAME |
1264
        URL_FLAGS_HAS_PASSWORD |
1265
        URL_FLAGS_HAS_HOST;
1266
106626
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
1267
  }
1268
  Local<Value> scheme =
1269
213255
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1270
106628
  if (scheme->IsString()) {
1271
106627
    Utf8Value value(env->isolate(), scheme);
1272
53314
    context.scheme.assign(*value, value.length());
1273
  }
1274
  Local<Value> port =
1275
213255
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1276
53314
  if (port->IsInt32())
1277
44
    context.port = port.As<Int32>()->Value();
1278
53314
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
1279
    Local<Value> username =
1280
28
        context_obj->Get(env->context(),
1281
70
                         env->username_string()).ToLocalChecked();
1282
28
    CHECK(username->IsString());
1283
28
    Utf8Value value(env->isolate(), username);
1284
14
    context.username.assign(*value, value.length());
1285
  }
1286
53314
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1287
    Local<Value> password =
1288
16
        context_obj->Get(env->context(),
1289
40
                         env->password_string()).ToLocalChecked();
1290
16
    CHECK(password->IsString());
1291
16
    Utf8Value value(env->isolate(), password);
1292
8
    context.password.assign(*value, value.length());
1293
  }
1294
  Local<Value> host =
1295
106628
      context_obj->Get(env->context(),
1296
266570
                       env->host_string()).ToLocalChecked();
1297
106628
  if (host->IsString()) {
1298
106608
    Utf8Value value(env->isolate(), host);
1299
53304
    context.host.assign(*value, value.length());
1300
  }
1301
53314
  return context;
1302
}
1303
1304
// Single dot segment can be ".", "%2e", or "%2E"
1305
3052477
bool IsSingleDotSegment(const std::string& str) {
1306
3052477
  switch (str.size()) {
1307
    case 1:
1308
1505
      return str == ".";
1309
    case 3:
1310
142426
      return str[0] == '%' &&
1311

142422
             str[1] == '2' &&
1312
142422
             ASCIILowercase(str[2]) == 'e';
1313
    default:
1314
2908576
      return false;
1315
  }
1316
}
1317
1318
// Double dot segment can be:
1319
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1320
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
1321
1527433
bool IsDoubleDotSegment(const std::string& str) {
1322

1527433
  switch (str.size()) {
1323
    case 2:
1324
1772
      return str == "..";
1325
    case 4:
1326

372648
      if (str[0] != '.' && str[0] != '%')
1327
372633
        return false;
1328
25
      return ((str[0] == '.' &&
1329
13
               str[1] == '%' &&
1330
4
               str[2] == '2' &&
1331

33
               ASCIILowercase(str[3]) == 'e') ||
1332
15
              (str[0] == '%' &&
1333
6
               str[1] == '2' &&
1334
6
               ASCIILowercase(str[2]) == 'e' &&
1335
17
               str[3] == '.'));
1336
    case 6:
1337
61877
      return (str[0] == '%' &&
1338
8
              str[1] == '2' &&
1339
6
              ASCIILowercase(str[2]) == 'e' &&
1340
4
              str[3] == '%' &&
1341

61877
              str[4] == '2' &&
1342
61875
              ASCIILowercase(str[5]) == 'e');
1343
    default:
1344
1091141
      return false;
1345
  }
1346
}
1347
1348
2731
void ShortenUrlPath(struct url_data* url) {
1349
2731
  if (url->path.empty()) return;
1350


2781
  if (url->path.size() == 1 && url->scheme == "file:" &&
1351
175
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
1352
2606
  url->path.pop_back();
1353
}
1354
1355
}  // anonymous namespace
1356
1357
212385
void URL::Parse(const char* input,
1358
                size_t len,
1359
                enum url_parse_state state_override,
1360
                struct url_data* url,
1361
                bool has_url,
1362
                const struct url_data* base,
1363
                bool has_base) {
1364
212385
  const char* p = input;
1365
212385
  const char* end = input + len;
1366
1367
212385
  if (!has_url) {
1368
134001
    for (const char* ptr = p; ptr < end; ptr++) {
1369
133978
      if (IsC0ControlOrSpace(*ptr))
1370
28
        p++;
1371
      else
1372
133950
        break;
1373
    }
1374
133998
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
1375
133975
      if (IsC0ControlOrSpace(*ptr))
1376
25
        end--;
1377
      else
1378
133950
        break;
1379
    }
1380
133973
    input = p;
1381
133973
    len = end - p;
1382
  }
1383
1384
  // The spec says we should strip out any ASCII tabs or newlines.
1385
  // In those cases, we create another std::string instance with the filtered
1386
  // contents, but in the general case we avoid the overhead.
1387
423073
  std::string whitespace_stripped;
1388
16567651
  for (const char* ptr = p; ptr < end; ptr++) {
1389
16355291
    if (!IsASCIITabOrNewline(*ptr))
1390
16355266
      continue;
1391
    // Hit tab or newline. Allocate storage, copy what we have until now,
1392
    // and then iterate and filter all similar characters out.
1393
22
    whitespace_stripped.reserve(len - 1);
1394
22
    whitespace_stripped.assign(p, ptr - p);
1395
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
1396
318
    for (ptr = ptr + 1; ptr < end; ptr++) {
1397
296
      if (!IsASCIITabOrNewline(*ptr))
1398
254
        whitespace_stripped += *ptr;
1399
    }
1400
1401
    // Update variables like they should have looked like if the string
1402
    // had been stripped of whitespace to begin with.
1403
22
    input = whitespace_stripped.c_str();
1404
22
    len = whitespace_stripped.size();
1405
22
    p = input;
1406
22
    end = input + len;
1407
22
    break;
1408
  }
1409
1410
212382
  bool atflag = false;  // Set when @ has been seen.
1411
212382
  bool square_bracket_flag = false;  // Set inside of [...]
1412
212382
  bool password_token_seen_flag = false;  // Set after a : after an username.
1413
1414
423070
  std::string buffer;
1415
1416
  // Set the initial parse state.
1417
212398
  const bool has_state_override = state_override != kUnknownState;
1418
212398
  enum url_parse_state state = has_state_override ? state_override :
1419
212398
                                                    kSchemeStart;
1420
1421

212398
  if (state < kSchemeStart || state > kFragment) {
1422
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1423
    return;
1424
  }
1425
1426

33849424
  while (p <= end) {
1427
16820223
    const char ch = p < end ? p[0] : kEOL;
1428
16820223
    bool special = (url->flags & URL_FLAGS_SPECIAL);
1429
    bool cannot_be_base;
1430

16820223
    const bool special_back_slash = (special && ch == '\\');
1431
1432





16820223
    switch (state) {
1433
      case kSchemeStart:
1434
133999
        if (IsASCIIAlpha(ch)) {
1435
131532
          buffer += ASCIILowercase(ch);
1436
131520
          state = kScheme;
1437
2470
        } else if (!has_state_override) {
1438
2467
          state = kNoScheme;
1439
2467
          continue;
1440
        } else {
1441
3
          url->flags |= URL_FLAGS_FAILED;
1442
3
          return;
1443
        }
1444
131520
        break;
1445
      case kScheme:
1446


529511
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1447
397979
          buffer += ASCIILowercase(ch);
1448

131532
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1449

130989
          if (has_state_override && buffer.size() == 0) {
1450
            url->flags |= URL_FLAGS_TERMINATED;
1451
            return;
1452
          }
1453
130989
          buffer += ':';
1454
1455
130989
          bool new_is_special = IsSpecial(buffer);
1456
1457
130989
          if (has_state_override) {
1458

45
            if ((special != new_is_special) ||
1459
14
                ((buffer == "file:") &&
1460
4
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1461
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1462
1
                  (url->port != -1)))) {
1463
13
              url->flags |= URL_FLAGS_TERMINATED;
1464
13
              return;
1465
            }
1466
1467
            // File scheme && (host == empty or null) check left to JS-land
1468
            // as it can be done before even entering C++ binding.
1469
          }
1470
1471
130976
          url->scheme = std::move(buffer);
1472
130976
          url->port = NormalizePort(url->scheme, url->port);
1473
130976
          if (new_is_special) {
1474
130046
            url->flags |= URL_FLAGS_SPECIAL;
1475
130046
            special = true;
1476
          } else {
1477
930
            url->flags &= ~URL_FLAGS_SPECIAL;
1478
930
            special = false;
1479
          }
1480
130976
          buffer.clear();
1481
130976
          if (has_state_override)
1482
8
            return;
1483
130968
          if (url->scheme == "file:") {
1484
128391
            state = kFile;
1485

4229
          } else if (special &&
1486

3100
                     has_base &&
1487
523
                     url->scheme == base->scheme) {
1488
178
            state = kSpecialRelativeOrAuthority;
1489
2399
          } else if (special) {
1490
1474
            state = kSpecialAuthoritySlashes;
1491
925
          } else if (p[1] == '/') {
1492
250
            state = kPathOrAuthority;
1493
250
            p++;
1494
          } else {
1495
675
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1496
675
            url->flags |= URL_FLAGS_HAS_PATH;
1497
675
            url->path.emplace_back("");
1498
675
            state = kCannotBeBase;
1499
130968
          }
1500
543
        } else if (!has_state_override) {
1501
541
          buffer.clear();
1502
541
          state = kNoScheme;
1503
541
          p = input;
1504
541
          continue;
1505
        } else {
1506
2
          url->flags |= URL_FLAGS_FAILED;
1507
2
          return;
1508
        }
1509
528947
        break;
1510
      case kNoScheme:
1511

3005
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1512

3005
        if (!has_base || (cannot_be_base && ch != '#')) {
1513
1235
          url->flags |= URL_FLAGS_FAILED;
1514
1235
          return;
1515

1770
        } else if (cannot_be_base && ch == '#') {
1516
14
          url->scheme = base->scheme;
1517
14
          if (IsSpecial(url->scheme)) {
1518
            url->flags |= URL_FLAGS_SPECIAL;
1519
            special = true;
1520
          } else {
1521
14
            url->flags &= ~URL_FLAGS_SPECIAL;
1522
14
            special = false;
1523
          }
1524
14
          if (base->flags & URL_FLAGS_HAS_PATH) {
1525
14
            url->flags |= URL_FLAGS_HAS_PATH;
1526
14
            url->path = base->path;
1527
          }
1528
14
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1529
2
            url->flags |= URL_FLAGS_HAS_QUERY;
1530
2
            url->query = base->query;
1531
          }
1532
14
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1533
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1534
            url->fragment = base->fragment;
1535
          }
1536
14
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1537
14
          state = kFragment;
1538

3512
        } else if (has_base &&
1539
1756
                   base->scheme != "file:") {
1540
147
          state = kRelative;
1541
147
          continue;
1542
        } else {
1543
1609
          url->scheme = "file:";
1544
1609
          url->flags |= URL_FLAGS_SPECIAL;
1545
1609
          special = true;
1546
1609
          state = kFile;
1547
1609
          continue;
1548
        }
1549
14
        break;
1550
      case kSpecialRelativeOrAuthority:
1551

178
        if (ch == '/' && p[1] == '/') {
1552
162
          state = kSpecialAuthorityIgnoreSlashes;
1553
162
          p++;
1554
        } else {
1555
16
          state = kRelative;
1556
16
          continue;
1557
        }
1558
162
        break;
1559
      case kPathOrAuthority:
1560
250
        if (ch == '/') {
1561
196
          state = kAuthority;
1562
        } else {
1563
54
          state = kPath;
1564
54
          continue;
1565
        }
1566
196
        break;
1567
      case kRelative:
1568
163
        url->scheme = base->scheme;
1569
163
        if (IsSpecial(url->scheme)) {
1570
120
          url->flags |= URL_FLAGS_SPECIAL;
1571
120
          special = true;
1572
        } else {
1573
43
          url->flags &= ~URL_FLAGS_SPECIAL;
1574
43
          special = false;
1575
        }
1576

163
        switch (ch) {
1577
          case kEOL:
1578
8
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1579
2
              url->flags |= URL_FLAGS_HAS_USERNAME;
1580
2
              url->username = base->username;
1581
            }
1582
8
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1583
2
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1584
2
              url->password = base->password;
1585
            }
1586
8
            if (base->flags & URL_FLAGS_HAS_HOST) {
1587
8
              url->flags |= URL_FLAGS_HAS_HOST;
1588
8
              url->host = base->host;
1589
            }
1590
8
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1591
              url->flags |= URL_FLAGS_HAS_QUERY;
1592
              url->query = base->query;
1593
            }
1594
8
            if (base->flags & URL_FLAGS_HAS_PATH) {
1595
8
              url->flags |= URL_FLAGS_HAS_PATH;
1596
8
              url->path = base->path;
1597
            }
1598
8
            url->port = base->port;
1599
8
            break;
1600
          case '/':
1601
35
            state = kRelativeSlash;
1602
35
            break;
1603
          case '?':
1604
24
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1605
              url->flags |= URL_FLAGS_HAS_USERNAME;
1606
              url->username = base->username;
1607
            }
1608
24
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1609
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1610
              url->password = base->password;
1611
            }
1612
24
            if (base->flags & URL_FLAGS_HAS_HOST) {
1613
22
              url->flags |= URL_FLAGS_HAS_HOST;
1614
22
              url->host = base->host;
1615
            }
1616
24
            if (base->flags & URL_FLAGS_HAS_PATH) {
1617
24
              url->flags |= URL_FLAGS_HAS_PATH;
1618
24
              url->path = base->path;
1619
            }
1620
24
            url->port = base->port;
1621
24
            state = kQuery;
1622
24
            break;
1623
          case '#':
1624
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1625
              url->flags |= URL_FLAGS_HAS_USERNAME;
1626
              url->username = base->username;
1627
            }
1628
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1629
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1630
              url->password = base->password;
1631
            }
1632
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1633
16
              url->flags |= URL_FLAGS_HAS_HOST;
1634
16
              url->host = base->host;
1635
            }
1636
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1637
              url->flags |= URL_FLAGS_HAS_QUERY;
1638
              url->query = base->query;
1639
            }
1640
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1641
18
              url->flags |= URL_FLAGS_HAS_PATH;
1642
18
              url->path = base->path;
1643
            }
1644
18
            url->port = base->port;
1645
18
            state = kFragment;
1646
18
            break;
1647
          default:
1648
78
            if (special_back_slash) {
1649
4
              state = kRelativeSlash;
1650
            } else {
1651
74
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1652
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1653
1
                url->username = base->username;
1654
              }
1655
74
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1656
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1657
1
                url->password = base->password;
1658
              }
1659
74
              if (base->flags & URL_FLAGS_HAS_HOST) {
1660
68
                url->flags |= URL_FLAGS_HAS_HOST;
1661
68
                url->host = base->host;
1662
              }
1663
74
              if (base->flags & URL_FLAGS_HAS_PATH) {
1664
74
                url->flags |= URL_FLAGS_HAS_PATH;
1665
74
                url->path = base->path;
1666
74
                ShortenUrlPath(url);
1667
              }
1668
74
              url->port = base->port;
1669
74
              state = kPath;
1670
74
              continue;
1671
            }
1672
        }
1673
89
        break;
1674
      case kRelativeSlash:
1675


39
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1676
8
          state = kSpecialAuthorityIgnoreSlashes;
1677
31
        } else if (ch == '/') {
1678
3
          state = kAuthority;
1679
        } else {
1680
28
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1681
4
            url->flags |= URL_FLAGS_HAS_USERNAME;
1682
4
            url->username = base->username;
1683
          }
1684
28
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1685
2
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1686
2
            url->password = base->password;
1687
          }
1688
28
          if (base->flags & URL_FLAGS_HAS_HOST) {
1689
26
            url->flags |= URL_FLAGS_HAS_HOST;
1690
26
            url->host = base->host;
1691
          }
1692
28
          url->port = base->port;
1693
28
          state = kPath;
1694
28
          continue;
1695
        }
1696
11
        break;
1697
      case kSpecialAuthoritySlashes:
1698
1474
        state = kSpecialAuthorityIgnoreSlashes;
1699

1474
        if (ch == '/' && p[1] == '/') {
1700
1382
          p++;
1701
        } else {
1702
92
          continue;
1703
        }
1704
1382
        break;
1705
      case kSpecialAuthorityIgnoreSlashes:
1706

1690
        if (ch != '/' && ch != '\\') {
1707
1644
          state = kAuthority;
1708
1644
          continue;
1709
        }
1710
46
        break;
1711
      case kAuthority:
1712
56211
        if (ch == '@') {
1713
157
          if (atflag) {
1714
13
            buffer.reserve(buffer.size() + 3);
1715
13
            buffer.insert(0, "%40");
1716
          }
1717
157
          atflag = true;
1718
157
          size_t blen = buffer.size();
1719

157
          if (blen > 0 && buffer[0] != ':') {
1720
99
            url->flags |= URL_FLAGS_HAS_USERNAME;
1721
          }
1722
741
          for (size_t n = 0; n < blen; n++) {
1723
584
            const char bch = buffer[n];
1724
584
            if (bch == ':') {
1725
87
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1726
87
              if (!password_token_seen_flag) {
1727
85
                password_token_seen_flag = true;
1728
85
                continue;
1729
              }
1730
            }
1731
499
            if (password_token_seen_flag) {
1732
189
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1733
            } else {
1734
310
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1735
            }
1736
          }
1737
157
          buffer.clear();
1738

56054
        } else if (ch == kEOL ||
1739
54247
                   ch == '/' ||
1740
54229
                   ch == '?' ||
1741
54219
                   ch == '#' ||
1742
                   special_back_slash) {
1743

1843
          if (atflag && buffer.size() == 0) {
1744
39
            url->flags |= URL_FLAGS_FAILED;
1745
39
            return;
1746
          }
1747
1804
          p -= buffer.size() + 1;
1748
1804
          buffer.clear();
1749
1804
          state = kHost;
1750
        } else {
1751
54211
          buffer += ch;
1752
        }
1753
56172
        break;
1754
      case kHost:
1755
      case kHostname:
1756

55422
        if (has_state_override && url->scheme == "file:") {
1757
6
          state = kFileHost;
1758
6
          continue;
1759

55416
        } else if (ch == ':' && !square_bracket_flag) {
1760
568
          if (buffer.size() == 0) {
1761
19
            url->flags |= URL_FLAGS_FAILED;
1762
19
            return;
1763
          }
1764
549
          url->flags |= URL_FLAGS_HAS_HOST;
1765
549
          if (!ParseHost(buffer, &url->host, special)) {
1766
3
            url->flags |= URL_FLAGS_FAILED;
1767
3
            return;
1768
          }
1769
546
          buffer.clear();
1770
546
          state = kPort;
1771
1090
          if (state_override == kHostname) {
1772
2
            return;
1773
          }
1774

54848
        } else if (ch == kEOL ||
1775
53517
                   ch == '/' ||
1776
53495
                   ch == '?' ||
1777
53481
                   ch == '#' ||
1778
                   special_back_slash) {
1779
1377
          p--;
1780

1377
          if (special && buffer.size() == 0) {
1781
11
            url->flags |= URL_FLAGS_FAILED;
1782
11
            return;
1783
          }
1784

1485
          if (has_state_override &&
1785

1388
              buffer.size() == 0 &&
1786

50
              ((url->username.size() > 0 || url->password.size() > 0) ||
1787
16
               url->port != -1)) {
1788
4
            url->flags |= URL_FLAGS_TERMINATED;
1789
4
            return;
1790
          }
1791
1362
          url->flags |= URL_FLAGS_HAS_HOST;
1792
1362
          if (!ParseHost(buffer, &url->host, special)) {
1793
216
            url->flags |= URL_FLAGS_FAILED;
1794
216
            return;
1795
          }
1796
1146
          buffer.clear();
1797
1146
          state = kPathStart;
1798
2213
          if (has_state_override) {
1799
79
            return;
1800
          }
1801
        } else {
1802
53471
          if (ch == '[')
1803
93
            square_bracket_flag = true;
1804
53471
          if (ch == ']')
1805
90
            square_bracket_flag = false;
1806
53471
          buffer += ch;
1807
        }
1808
55082
        break;
1809
      case kPort:
1810
3127
        if (IsASCIIDigit(ch)) {
1811
2566
          buffer += ch;
1812

561
        } else if (has_state_override ||
1813
185
                   ch == kEOL ||
1814
27
                   ch == '/' ||
1815
27
                   ch == '?' ||
1816
27
                   ch == '#' ||
1817
                   special_back_slash) {
1818
534
          if (buffer.size() > 0) {
1819
529
            unsigned port = 0;
1820
            // the condition port <= 0xffff prevents integer overflow
1821

2933
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1822
2404
              port = port * 10 + buffer[i] - '0';
1823
529
            if (port > 0xffff) {
1824
              // TODO(TimothyGu): This hack is currently needed for the host
1825
              // setter since it needs access to hostname if it is valid, and
1826
              // if the FAILED flag is set the entire response to JS layer
1827
              // will be empty.
1828
18
              if (state_override == kHost)
1829
1
                url->port = -1;
1830
              else
1831
17
                url->flags |= URL_FLAGS_FAILED;
1832
18
              return;
1833
            }
1834
            // the port is valid
1835
511
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1836
511
            if (url->port == -1)
1837
29
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1838
511
            buffer.clear();
1839
5
          } else if (has_state_override) {
1840
            // TODO(TimothyGu): Similar case as above.
1841
1
            if (state_override == kHost)
1842
1
              url->port = -1;
1843
            else
1844
              url->flags |= URL_FLAGS_TERMINATED;
1845
1
            return;
1846
          }
1847
515
          state = kPathStart;
1848
515
          continue;
1849
        } else {
1850
27
          url->flags |= URL_FLAGS_FAILED;
1851
27
          return;
1852
        }
1853
2566
        break;
1854
      case kFile:
1855
130000
        url->scheme = "file:";
1856

130000
        if (ch == '/' || ch == '\\') {
1857
128488
          state = kFileSlash;
1858

1512
        } else if (has_base && base->scheme == "file:") {
1859

1505
          switch (ch) {
1860
            case kEOL:
1861
15
              if (base->flags & URL_FLAGS_HAS_HOST) {
1862
15
                url->flags |= URL_FLAGS_HAS_HOST;
1863
15
                url->host = base->host;
1864
              }
1865
15
              if (base->flags & URL_FLAGS_HAS_PATH) {
1866
15
                url->flags |= URL_FLAGS_HAS_PATH;
1867
15
                url->path = base->path;
1868
              }
1869
15
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1870
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1871
2
                url->query = base->query;
1872
              }
1873
15
              break;
1874
            case '?':
1875
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1876
2
                url->flags |= URL_FLAGS_HAS_HOST;
1877
2
                url->host = base->host;
1878
              }
1879
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1880
2
                url->flags |= URL_FLAGS_HAS_PATH;
1881
2
                url->path = base->path;
1882
              }
1883
2
              url->flags |= URL_FLAGS_HAS_QUERY;
1884
2
              url->query.clear();
1885
2
              state = kQuery;
1886
2
              break;
1887
            case '#':
1888
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1889
2
                url->flags |= URL_FLAGS_HAS_HOST;
1890
2
                url->host = base->host;
1891
              }
1892
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1893
2
                url->flags |= URL_FLAGS_HAS_PATH;
1894
2
                url->path = base->path;
1895
              }
1896
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1897
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1898
2
                url->query = base->query;
1899
              }
1900
2
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1901
2
              url->fragment.clear();
1902
2
              state = kFragment;
1903
2
              break;
1904
            default:
1905
1486
              if (!StartsWithWindowsDriveLetter(p, end)) {
1906
1477
                if (base->flags & URL_FLAGS_HAS_HOST) {
1907
1477
                  url->flags |= URL_FLAGS_HAS_HOST;
1908
1477
                  url->host = base->host;
1909
                }
1910
1477
                if (base->flags & URL_FLAGS_HAS_PATH) {
1911
1477
                  url->flags |= URL_FLAGS_HAS_PATH;
1912
1477
                  url->path = base->path;
1913
                }
1914
1477
                ShortenUrlPath(url);
1915
              }
1916
1486
              state = kPath;
1917
1486
              continue;
1918
          }
1919
        } else {
1920
7
          state = kPath;
1921
7
          continue;
1922
        }
1923
128507
        break;
1924
      case kFileSlash:
1925

128488
        if (ch == '/' || ch == '\\') {
1926
128387
          state = kFileHost;
1927
        } else {
1928

202
          if (has_base &&
1929

199
              base->scheme == "file:" &&
1930
98
              !StartsWithWindowsDriveLetter(p, end)) {
1931
93
            if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1932
1
              url->flags |= URL_FLAGS_HAS_PATH;
1933
1
              url->path.push_back(base->path[0]);
1934
            } else {
1935
92
              if (base->flags & URL_FLAGS_HAS_HOST) {
1936
92
                url->flags |= URL_FLAGS_HAS_HOST;
1937
92
                url->host = base->host;
1938
              } else {
1939
                url->flags &= ~URL_FLAGS_HAS_HOST;
1940
                url->host.clear();
1941
              }
1942
            }
1943
          }
1944
101
          state = kPath;
1945
101
          continue;
1946
        }
1947
128387
        break;
1948
      case kFileHost:
1949

128788
        if (ch == kEOL ||
1950
400
            ch == '/' ||
1951
395
            ch == '\\' ||
1952
395
            ch == '?' ||
1953
            ch == '#') {
1954

385173
          if (!has_state_override &&
1955

128400
              buffer.size() == 2 &&
1956
7
              IsWindowsDriveLetter(buffer)) {
1957
4
            state = kPath;
1958
128389
          } else if (buffer.size() == 0) {
1959
128325
            url->flags |= URL_FLAGS_HAS_HOST;
1960
128325
            url->host.clear();
1961
128325
            if (has_state_override)
1962
2
              return;
1963
128323
            state = kPathStart;
1964
          } else {
1965
112
            std::string host;
1966
64
            if (!ParseHost(buffer, &host, special)) {
1967
14
              url->flags |= URL_FLAGS_FAILED;
1968
14
              return;
1969
            }
1970
50
            if (host == "localhost")
1971
11
              host.clear();
1972
50
            url->flags |= URL_FLAGS_HAS_HOST;
1973
50
            url->host = host;
1974
50
            if (has_state_override)
1975
2
              return;
1976
48
            buffer.clear();
1977
48
            state = kPathStart;
1978
          }
1979
128375
          continue;
1980
        } else {
1981
395
          buffer += ch;
1982
        }
1983
395
        break;
1984
      case kPathStart:
1985
208042
        if (IsSpecial(url->scheme)) {
1986
207875
          state = kPath;
1987

207875
          if (ch != '/' && ch != '\\') {
1988
78617
            continue;
1989
          }
1990

166
        } else if (!has_state_override && ch == '?') {
1991
3
          url->flags |= URL_FLAGS_HAS_QUERY;
1992
3
          url->query.clear();
1993
3
          state = kQuery;
1994

163
        } else if (!has_state_override && ch == '#') {
1995
3
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1996
3
          url->fragment.clear();
1997
3
          state = kFragment;
1998
160
        } else if (ch != kEOL) {
1999
121
          state = kPath;
2000
121
          if (ch != '/') {
2001
6
            continue;
2002
          }
2003
        }
2004
129418
        break;
2005
      case kPath:
2006

15417494
        if (ch == kEOL ||
2007
13890213
            ch == '/' ||
2008
13890180
            special_back_slash ||
2009

19339571
            (!has_state_override && (ch == '?' || ch == '#'))) {
2010
1527434
          if (IsDoubleDotSegment(buffer)) {
2011
1180
            ShortenUrlPath(url);
2012

1180
            if (ch != '/' && !special_back_slash) {
2013
18
              url->flags |= URL_FLAGS_HAS_PATH;
2014
18
              url->path.emplace_back("");
2015
            }
2016

3052958
          } else if (IsSingleDotSegment(buffer) &&
2017

1526283
                     ch != '/' && !special_back_slash) {
2018
29
            url->flags |= URL_FLAGS_HAS_PATH;
2019
29
            url->path.emplace_back("");
2020
1526224
          } else if (!IsSingleDotSegment(buffer)) {
2021

4575262
            if (url->scheme == "file:" &&
2022
1705290
                url->path.empty() &&
2023

1707469
                buffer.size() == 2 &&
2024
36
                IsWindowsDriveLetter(buffer)) {
2025

51
              if ((url->flags & URL_FLAGS_HAS_HOST) &&
2026
16
                  !url->host.empty()) {
2027
3
                url->host.clear();
2028
3
                url->flags |= URL_FLAGS_HAS_HOST;
2029
              }
2030
35
              buffer[1] = ':';
2031
            }
2032
1525801
            url->flags |= URL_FLAGS_HAS_PATH;
2033
1525801
            url->path.emplace_back(std::move(buffer));
2034
          }
2035
1527434
          buffer.clear();
2036

1735474
          if (url->scheme == "file:" &&
2037
1317174
              (ch == kEOL ||
2038
1317162
               ch == '?' ||
2039
               ch == '#')) {
2040

258360
            while (url->path.size() > 1 && url->path[0].length() == 0) {
2041
25160
              url->path.erase(url->path.begin());
2042
            }
2043
          }
2044
3054868
          if (ch == '?') {
2045
109
            url->flags |= URL_FLAGS_HAS_QUERY;
2046
109
            state = kQuery;
2047
1527325
          } else if (ch == '#') {
2048
11
            state = kFragment;
2049
          }
2050
        } else {
2051
13890060
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2052
        }
2053
15417492
        break;
2054
      case kCannotBeBase:
2055
18794
        switch (ch) {
2056
          case '?':
2057
2
            state = kQuery;
2058
2
            break;
2059
          case '#':
2060
5
            state = kFragment;
2061
5
            break;
2062
          default:
2063
18787
            if (url->path.size() == 0)
2064
              url->path.emplace_back("");
2065

18787
            if (url->path.size() > 0 && ch != kEOL)
2066
18119
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2067
        }
2068
18794
        break;
2069
      case kQuery:
2070

2973
        if (ch == kEOL || (!has_state_override && ch == '#')) {
2071
255
          url->flags |= URL_FLAGS_HAS_QUERY;
2072
255
          url->query = std::move(buffer);
2073
255
          buffer.clear();
2074
510
          if (ch == '#')
2075
54
            state = kFragment;
2076
        } else {
2077
2718
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2078
2718
                                                QUERY_ENCODE_SET_NONSPECIAL);
2079
        }
2080
2973
        break;
2081
      case kFragment:
2082
575
        switch (ch) {
2083
          case kEOL:
2084
126
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
2085
126
            url->fragment = std::move(buffer);
2086
126
            break;
2087
          case 0:
2088
2
            break;
2089
          default:
2090
447
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2091
        }
2092
575
        break;
2093
      default:
2094
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2095
        return;
2096
    }
2097
2098
16602728
    p++;
2099
  }
2100
}  // NOLINT(readability/fn_size)
2101
2102
namespace {
2103
160569
void SetArgs(Environment* env,
2104
             Local<Value> argv[ARG_COUNT],
2105
             const struct url_data& url) {
2106
160569
  Isolate* isolate = env->isolate();
2107
321138
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2108
160569
  argv[ARG_PROTOCOL] =
2109
160569
      url.flags & URL_FLAGS_SPECIAL ?
2110
159595
          GetSpecial(env, url.scheme) :
2111
641302
          OneByteString(isolate, url.scheme.c_str());
2112
160569
  if (url.flags & URL_FLAGS_HAS_USERNAME)
2113
176
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2114
160569
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
2115
152
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2116
160569
  if (url.flags & URL_FLAGS_HAS_HOST)
2117
319555
    argv[ARG_HOST] = Utf8String(isolate, url.host);
2118
160568
  if (url.flags & URL_FLAGS_HAS_QUERY)
2119
514
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
2120
160568
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2121
244
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2122
160568
  if (url.port > -1)
2123
986
    argv[ARG_PORT] = Integer::New(isolate, url.port);
2124
160568
  if (url.flags & URL_FLAGS_HAS_PATH)
2125
320588
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
2126
160568
}
2127
2128
162170
void Parse(Environment* env,
2129
           Local<Value> recv,
2130
           const char* input,
2131
           size_t len,
2132
           enum url_parse_state state_override,
2133
           Local<Value> base_obj,
2134
           Local<Value> context_obj,
2135
           Local<Function> cb,
2136
           Local<Value> error_cb) {
2137
162170
  Isolate* isolate = env->isolate();
2138
162170
  Local<Context> context = env->context();
2139
324323
  HandleScope handle_scope(isolate);
2140
162153
  Context::Scope context_scope(context);
2141
2142
162169
  const bool has_context = context_obj->IsObject();
2143
162169
  const bool has_base = base_obj->IsObject();
2144
2145
324322
  url_data base;
2146
324323
  url_data url;
2147
162170
  if (has_context)
2148
53314
    url = HarvestContext(env, context_obj.As<Object>());
2149
162169
  if (has_base)
2150
2574
    base = HarvestBase(env, base_obj.As<Object>());
2151
2152
162169
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2153

162170
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2154
53314
      ((state_override != kUnknownState) &&
2155
53314
       (url.flags & URL_FLAGS_TERMINATED)))
2156
17
    return;
2157
2158
  // Define the return value placeholders
2159
  const Local<Value> undef = Undefined(isolate);
2160
  const Local<Value> null = Null(isolate);
2161
162153
  if (!(url.flags & URL_FLAGS_FAILED)) {
2162
    Local<Value> argv[] = {
2163
      undef,
2164
      undef,
2165
      undef,
2166
      undef,
2167
      null,  // host defaults to null
2168
      null,  // port defaults to null
2169
      undef,
2170
      null,  // query defaults to null
2171
      null,  // fragment defaults to null
2172
160569
    };
2173
160569
    SetArgs(env, argv, url);
2174
481704
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2175
1584
  } else if (error_cb->IsFunction()) {
2176
1537
    Local<Value> argv[2] = { undef, undef };
2177
3074
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2178
1537
    argv[ERR_ARG_INPUT] =
2179
3074
      String::NewFromUtf8(env->isolate(),
2180
                          input,
2181
3074
                          NewStringType::kNormal).ToLocalChecked();
2182
6148
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2183
1537
        .FromMaybe(Local<Value>());
2184
  }
2185
}
2186
2187
162169
void Parse(const FunctionCallbackInfo<Value>& args) {
2188
162169
  Environment* env = Environment::GetCurrent(args);
2189
162171
  CHECK_GE(args.Length(), 5);
2190
486513
  CHECK(args[0]->IsString());  // input
2191


659325
  CHECK(args[2]->IsUndefined() ||  // base context
2192
        args[2]->IsNull() ||
2193
        args[2]->IsObject());
2194


753083
  CHECK(args[3]->IsUndefined() ||  // context
2195
        args[3]->IsNull() ||
2196
        args[3]->IsObject());
2197
324340
  CHECK(args[4]->IsFunction());  // complete callback
2198

704219
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
2199
2200
324338
  Utf8Value input(env->isolate(), args[0]);
2201
162170
  enum url_parse_state state_override = kUnknownState;
2202
324340
  if (args[1]->IsNumber()) {
2203
162170
    state_override = static_cast<enum url_parse_state>(
2204
810850
        args[1]->Uint32Value(env->context()).FromJust());
2205
  }
2206
2207
324338
  Parse(env, args.This(),
2208
162170
        *input, input.length(),
2209
        state_override,
2210
        args[2],
2211
        args[3],
2212
324338
        args[4].As<Function>(),
2213
162170
        args[5]);
2214
162170
}
2215
2216
22
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2217
22
  Environment* env = Environment::GetCurrent(args);
2218
22
  CHECK_GE(args.Length(), 1);
2219
66
  CHECK(args[0]->IsString());
2220
44
  Utf8Value value(env->isolate(), args[0]);
2221
44
  std::string output;
2222
22
  size_t len = value.length();
2223
22
  output.reserve(len);
2224
233
  for (size_t n = 0; n < len; n++) {
2225
211
    const char ch = (*value)[n];
2226
211
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2227
  }
2228
44
  args.GetReturnValue().Set(
2229
44
      String::NewFromUtf8(env->isolate(),
2230
                          output.c_str(),
2231
22
                          NewStringType::kNormal).ToLocalChecked());
2232
22
}
2233
2234
11
void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2235
11
  Environment* env = Environment::GetCurrent(args);
2236
11
  CHECK_GE(args.Length(), 2);
2237
33
  CHECK(args[0]->IsString());
2238
22
  CHECK(args[1]->IsNumber());
2239
2240
22
  TwoByteValue value(env->isolate(), args[0]);
2241
2242
44
  int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2243
11
  CHECK_GE(start, 0);
2244
2245
32
  for (size_t i = start; i < value.length(); i++) {
2246
21
    char16_t c = value[i];
2247
21
    if (!IsUnicodeSurrogate(c)) {
2248
8
      continue;
2249

13
    } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) {
2250
12
      value[i] = kUnicodeReplacementCharacter;
2251
    } else {
2252
1
      char16_t d = value[i + 1];
2253
1
      if (IsUnicodeTrail(d)) {
2254
        i++;
2255
      } else {
2256
1
        value[i] = kUnicodeReplacementCharacter;
2257
      }
2258
    }
2259
  }
2260
2261
22
  args.GetReturnValue().Set(
2262
22
      String::NewFromTwoByte(env->isolate(),
2263
11
                             *value,
2264
                             NewStringType::kNormal,
2265
22
                             value.length()).ToLocalChecked());
2266
11
}
2267
2268
222
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2269
222
  Environment* env = Environment::GetCurrent(args);
2270
222
  CHECK_GE(args.Length(), 1);
2271
666
  CHECK(args[0]->IsString());
2272
435
  Utf8Value value(env->isolate(), args[0]);
2273
2274
435
  URLHost host;
2275
  // Assuming the host is used for a special scheme.
2276
222
  host.ParseHost(*value, value.length(), true);
2277
222
  if (host.ParsingFailed()) {
2278
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2279
9
    return;
2280
  }
2281
426
  std::string out = host.ToStringMove();
2282
426
  args.GetReturnValue().Set(
2283
426
      String::NewFromUtf8(env->isolate(),
2284
                          out.c_str(),
2285
213
                          NewStringType::kNormal).ToLocalChecked());
2286
}
2287
2288
202
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2289
202
  Environment* env = Environment::GetCurrent(args);
2290
202
  CHECK_GE(args.Length(), 1);
2291
606
  CHECK(args[0]->IsString());
2292
395
  Utf8Value value(env->isolate(), args[0]);
2293
2294
395
  URLHost host;
2295
  // Assuming the host is used for a special scheme.
2296
202
  host.ParseHost(*value, value.length(), true, true);
2297
202
  if (host.ParsingFailed()) {
2298
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2299
9
    return;
2300
  }
2301
386
  std::string out = host.ToStringMove();
2302
386
  args.GetReturnValue().Set(
2303
386
      String::NewFromUtf8(env->isolate(),
2304
                          out.c_str(),
2305
193
                          NewStringType::kNormal).ToLocalChecked());
2306
}
2307
2308
4594
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2309
4594
  Environment* env = Environment::GetCurrent(args);
2310
4594
  CHECK_EQ(args.Length(), 1);
2311
9188
  CHECK(args[0]->IsFunction());
2312
9188
  env->set_url_constructor_function(args[0].As<Function>());
2313
4594
}
2314
2315
4594
void Initialize(Local<Object> target,
2316
                Local<Value> unused,
2317
                Local<Context> context,
2318
                void* priv) {
2319
4594
  Environment* env = Environment::GetCurrent(context);
2320
4594
  env->SetMethod(target, "parse", Parse);
2321
4594
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2322
4594
  env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2323
4594
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2324
4594
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2325
4594
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2326
2327
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2328
238888
  FLAGS(XX)
2329
#undef XX
2330
2331
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
2332
385896
  PARSESTATES(XX)
2333
#undef XX
2334
4594
}
2335
}  // namespace
2336
2337
8
std::string URL::ToFilePath() const {
2338
8
  if (context_.scheme != "file:") {
2339
1
    return "";
2340
  }
2341
2342
#ifdef _WIN32
2343
  const char* slash = "\\";
2344
  auto is_slash = [] (char ch) {
2345
    return ch == '/' || ch == '\\';
2346
  };
2347
#else
2348
7
  const char* slash = "/";
2349
46
  auto is_slash = [] (char ch) {
2350
    return ch == '/';
2351
46
  };
2352

14
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2353
7
      context_.host.length() > 0) {
2354
1
    return "";
2355
  }
2356
#endif
2357
12
  std::string decoded_path;
2358
18
  for (const std::string& part : context_.path) {
2359
25
    std::string decoded = PercentDecode(part.c_str(), part.length());
2360
58
    for (char& ch : decoded) {
2361
46
      if (is_slash(ch)) {
2362
1
        return "";
2363
      }
2364
    }
2365
12
    decoded_path += slash + decoded;
2366
  }
2367
2368
#ifdef _WIN32
2369
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2370
2371
  // If hostname is set, then we have a UNC path. Pass the hostname through
2372
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
2373
  // need to worry about percent encoding because the URL parser will have
2374
  // already taken care of that for us. Note that this only causes IDNs with an
2375
  // appropriate `xn--` prefix to be decoded.
2376
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2377
      context_.host.length() > 0) {
2378
    std::string unicode_host;
2379
    if (!ToUnicode(context_.host, &unicode_host)) {
2380
      return "";
2381
    }
2382
    return "\\\\" + unicode_host + decoded_path;
2383
  }
2384
  // Otherwise, it's a local path that requires a drive letter.
2385
  if (decoded_path.length() < 3) {
2386
    return "";
2387
  }
2388
  if (decoded_path[2] != ':' ||
2389
      !IsASCIIAlpha(decoded_path[1])) {
2390
    return "";
2391
  }
2392
  // Strip out the leading '\'.
2393
  return decoded_path.substr(1);
2394
#else
2395
5
  return decoded_path;
2396
#endif
2397
}
2398
2399
25099
URL URL::FromFilePath(const std::string& file_path) {
2400
25099
  URL url("file://");
2401
50198
  std::string escaped_file_path;
2402
3045964
  for (size_t i = 0; i < file_path.length(); ++i) {
2403
3020865
    escaped_file_path += file_path[i];
2404
3020865
    if (file_path[i] == '%')
2405
8
      escaped_file_path += "25";
2406
  }
2407
25099
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2408
25099
             &url.context_, true, nullptr, false);
2409
50198
  return url;
2410
}
2411
2412
// This function works by calling out to a JS function that creates and
2413
// returns the JS URL object. Be mindful of the JS<->Native boundary
2414
// crossing that is required.
2415
MaybeLocal<Value> URL::ToObject(Environment* env) const {
2416
  Isolate* isolate = env->isolate();
2417
  Local<Context> context = env->context();
2418
  Context::Scope context_scope(context);
2419
2420
  const Local<Value> undef = Undefined(isolate);
2421
  const Local<Value> null = Null(isolate);
2422
2423
  if (context_.flags & URL_FLAGS_FAILED)
2424
    return Local<Value>();
2425
2426
  Local<Value> argv[] = {
2427
    undef,
2428
    undef,
2429
    undef,
2430
    undef,
2431
    null,  // host defaults to null
2432
    null,  // port defaults to null
2433
    undef,
2434
    null,  // query defaults to null
2435
    null,  // fragment defaults to null
2436
  };
2437
  SetArgs(env, argv, context_);
2438
2439
  MaybeLocal<Value> ret;
2440
  {
2441
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
2442
2443
    // The SetURLConstructor method must have been called already to
2444
    // set the constructor function used below. SetURLConstructor is
2445
    // called automatically when the internal/url.js module is loaded
2446
    // during the internal/bootstrap/node.js processing.
2447
    ret = env->url_constructor_function()
2448
        ->Call(env->context(), undef, arraysize(argv), argv);
2449
  }
2450
2451
  return ret;
2452
}
2453
2454
}  // namespace url
2455
}  // namespace node
2456
2457
4325
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)