GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/node_url.cc Lines: 1172 1206 97.2 %
Date: 2020-02-19 22:14:06 Branches: 1074 1196 89.8 %

Line Branch Exec Source
1
#include "node_url.h"
2
#include "base_object-inl.h"
3
#include "node_errors.h"
4
#include "node_i18n.h"
5
#include "util-inl.h"
6
7
#include <cmath>
8
#include <cstdio>
9
#include <string>
10
#include <vector>
11
12
namespace node {
13
14
using errors::TryCatchScope;
15
16
using v8::Array;
17
using v8::Context;
18
using v8::Function;
19
using v8::FunctionCallbackInfo;
20
using v8::HandleScope;
21
using v8::Int32;
22
using v8::Integer;
23
using v8::Isolate;
24
using v8::Local;
25
using v8::MaybeLocal;
26
using v8::NewStringType;
27
using v8::Null;
28
using v8::Object;
29
using v8::String;
30
using v8::Undefined;
31
using v8::Value;
32
33
157909
Local<String> Utf8String(Isolate* isolate, const std::string& str) {
34
315818
  return String::NewFromUtf8(isolate,
35
                             str.data(),
36
                             NewStringType::kNormal,
37
315818
                             str.length()).ToLocalChecked();
38
}
39
40
namespace url {
41
42
namespace {
43
44
// https://url.spec.whatwg.org/#eof-code-point
45
constexpr char kEOL = -1;
46
47
// Used in ToUSVString().
48
constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD;
49
50
// https://url.spec.whatwg.org/#concept-host
51
2299
class URLHost {
52
 public:
53
  ~URLHost();
54
55
  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
56
  void ParseIPv6Host(const char* input, size_t length);
57
  void ParseOpaqueHost(const char* input, size_t length);
58
  void ParseHost(const char* input,
59
                 size_t length,
60
                 bool is_special,
61
                 bool unicode = false);
62
63
2299
  bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
64
  std::string ToString() const;
65
  // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
66
  std::string ToStringMove();
67
68
 private:
69
  enum class HostType {
70
    H_FAILED,
71
    H_DOMAIN,
72
    H_IPV4,
73
    H_IPV6,
74
    H_OPAQUE,
75
  };
76
77
  union Value {
78
    std::string domain_or_opaque;
79
    uint32_t ipv4;
80
    uint16_t ipv6[8];
81
82
2299
    ~Value() {}
83
2299
    Value() : ipv4(0) {}
84
  };
85
86
  Value value_;
87
  HostType type_ = HostType::H_FAILED;
88
89
6355
  void Reset() {
90
    using string = std::string;
91
6355
    switch (type_) {
92
      case HostType::H_DOMAIN:
93
      case HostType::H_OPAQUE:
94
1990
        value_.domain_or_opaque.~string();
95
1990
        break;
96
      default:
97
4365
        break;
98
    }
99
6355
    type_ = HostType::H_FAILED;
100
6355
  }
101
102
  // Setting the string members of the union with = is brittle because
103
  // it relies on them being initialized to a state that requires no
104
  // destruction of old data.
105
  // For a long time, that worked well enough because ParseIPv6Host() happens
106
  // to zero-fill `value_`, but that really is relying on standard library
107
  // internals too much.
108
  // These helpers are the easiest solution but we might want to consider
109
  // just not forcing strings into an union.
110
127
  void SetOpaque(std::string&& string) {
111
127
    Reset();
112
127
    type_ = HostType::H_OPAQUE;
113
127
    new(&value_.domain_or_opaque) std::string(std::move(string));
114
127
  }
115
116
1863
  void SetDomain(std::string&& string) {
117
1863
    Reset();
118
1863
    type_ = HostType::H_DOMAIN;
119
1863
    new(&value_.domain_or_opaque) std::string(std::move(string));
120
1863
  }
121
};
122
123
4598
URLHost::~URLHost() {
124
2299
  Reset();
125
2299
}
126
127
#define ARGS(XX)                                                              \
128
  XX(ARG_FLAGS)                                                               \
129
  XX(ARG_PROTOCOL)                                                            \
130
  XX(ARG_USERNAME)                                                            \
131
  XX(ARG_PASSWORD)                                                            \
132
  XX(ARG_HOST)                                                                \
133
  XX(ARG_PORT)                                                                \
134
  XX(ARG_PATH)                                                                \
135
  XX(ARG_QUERY)                                                               \
136
  XX(ARG_FRAGMENT)                                                            \
137
  XX(ARG_COUNT)  // This one has to be last.
138
139
#define ERR_ARGS(XX)                                                          \
140
  XX(ERR_ARG_FLAGS)                                                           \
141
  XX(ERR_ARG_INPUT)                                                           \
142
143
enum url_cb_args {
144
#define XX(name) name,
145
  ARGS(XX)
146
#undef XX
147
};
148
149
enum url_error_cb_args {
150
#define XX(name) name,
151
  ERR_ARGS(XX)
152
#undef XX
153
};
154
155
#define CHAR_TEST(bits, name, expr)                                           \
156
  template <typename T>                                                       \
157
  bool name(const T ch) {                                              \
158
    static_assert(sizeof(ch) >= (bits) / 8,                                   \
159
                  "Character must be wider than " #bits " bits");             \
160
    return (expr);                                                            \
161
  }
162
163
#define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
164
  template <typename T>                                                       \
165
  bool name(const T ch1, const T ch2) {                                \
166
    static_assert(sizeof(ch1) >= (bits) / 8,                                  \
167
                  "Character must be wider than " #bits " bits");             \
168
    return (expr);                                                            \
169
  }                                                                           \
170
  template <typename T>                                                       \
171
  bool name(const std::basic_string<T>& str) {                         \
172
    static_assert(sizeof(str[0]) >= (bits) / 8,                               \
173
                  "Character must be wider than " #bits " bits");             \
174
    return str.length() >= 2 && name(str[0], str[1]);                         \
175
  }
176
177
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
178

16199885
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
179
180
// https://infra.spec.whatwg.org/#c0-control-or-space
181

262711
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
182
183
// https://infra.spec.whatwg.org/#ascii-digit
184

526426
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
185
186
// https://infra.spec.whatwg.org/#ascii-hex-digit
187


926
CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
188
                               (ch >= 'A' && ch <= 'F') ||
189
                               (ch >= 'a' && ch <= 'f')))
190
191
// https://infra.spec.whatwg.org/#ascii-alpha
192


1173208
CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
193
                            (ch >= 'a' && ch <= 'z')))
194
195
// https://infra.spec.whatwg.org/#ascii-alphanumeric
196

519912
CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
197
198
// https://infra.spec.whatwg.org/#ascii-lowercase
199
template <typename T>
200
519949
T ASCIILowercase(T ch) {
201
519949
  return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
202
}
203
204
// https://url.spec.whatwg.org/#forbidden-host-code-point
205







65428
CHAR_TEST(8, IsForbiddenHostCodePoint,
206
          ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
207
          ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
208
          ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
209
          ch == '\\' || ch == ']')
210
211
// https://url.spec.whatwg.org/#windows-drive-letter
212


1367
TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
213
                     (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
214
215
// https://url.spec.whatwg.org/#normalized-windows-drive-letter
216


474
TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
217
                     (IsASCIIAlpha(ch1) && ch2 == ':'))
218
219
// If a UTF-16 character is a low/trailing surrogate.
220
1
CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
221
222
// If a UTF-16 character is a surrogate.
223
21
CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
224
225
// If a UTF-16 surrogate is a low/trailing one.
226
13
CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
227
228
#undef CHAR_TEST
229
#undef TWO_CHAR_STRING_TEST
230
231
const char* hex[256] = {
232
  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
233
  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
234
  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
235
  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
236
  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
237
  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
238
  "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
239
  "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
240
  "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
241
  "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
242
  "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
243
  "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
244
  "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
245
  "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
246
  "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
247
  "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
248
  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
249
  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
250
  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
251
  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
252
  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
253
  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
254
  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
255
  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
256
  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
257
  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
258
  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
259
  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
260
  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
261
  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
262
  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
263
  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
264
};
265
266
const uint8_t C0_CONTROL_ENCODE_SET[32] = {
267
  // 00     01     02     03     04     05     06     07
268
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269
  // 08     09     0A     0B     0C     0D     0E     0F
270
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271
  // 10     11     12     13     14     15     16     17
272
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273
  // 18     19     1A     1B     1C     1D     1E     1F
274
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
275
  // 20     21     22     23     24     25     26     27
276
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
277
  // 28     29     2A     2B     2C     2D     2E     2F
278
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
279
  // 30     31     32     33     34     35     36     37
280
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
281
  // 38     39     3A     3B     3C     3D     3E     3F
282
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
283
  // 40     41     42     43     44     45     46     47
284
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
285
  // 48     49     4A     4B     4C     4D     4E     4F
286
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
287
  // 50     51     52     53     54     55     56     57
288
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
289
  // 58     59     5A     5B     5C     5D     5E     5F
290
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
291
  // 60     61     62     63     64     65     66     67
292
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
293
  // 68     69     6A     6B     6C     6D     6E     6F
294
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
295
  // 70     71     72     73     74     75     76     77
296
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
297
  // 78     79     7A     7B     7C     7D     7E     7F
298
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
299
  // 80     81     82     83     84     85     86     87
300
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
301
  // 88     89     8A     8B     8C     8D     8E     8F
302
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
303
  // 90     91     92     93     94     95     96     97
304
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
305
  // 98     99     9A     9B     9C     9D     9E     9F
306
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
307
  // A0     A1     A2     A3     A4     A5     A6     A7
308
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
309
  // A8     A9     AA     AB     AC     AD     AE     AF
310
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
311
  // B0     B1     B2     B3     B4     B5     B6     B7
312
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
313
  // B8     B9     BA     BB     BC     BD     BE     BF
314
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
315
  // C0     C1     C2     C3     C4     C5     C6     C7
316
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
317
  // C8     C9     CA     CB     CC     CD     CE     CF
318
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
319
  // D0     D1     D2     D3     D4     D5     D6     D7
320
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
321
  // D8     D9     DA     DB     DC     DD     DE     DF
322
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
323
  // E0     E1     E2     E3     E4     E5     E6     E7
324
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
325
  // E8     E9     EA     EB     EC     ED     EE     EF
326
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
327
  // F0     F1     F2     F3     F4     F5     F6     F7
328
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
329
  // F8     F9     FA     FB     FC     FD     FE     FF
330
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
331
};
332
333
const uint8_t FRAGMENT_ENCODE_SET[32] = {
334
  // 00     01     02     03     04     05     06     07
335
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336
  // 08     09     0A     0B     0C     0D     0E     0F
337
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338
  // 10     11     12     13     14     15     16     17
339
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340
  // 18     19     1A     1B     1C     1D     1E     1F
341
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
342
  // 20     21     22     23     24     25     26     27
343
    0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
344
  // 28     29     2A     2B     2C     2D     2E     2F
345
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
346
  // 30     31     32     33     34     35     36     37
347
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
348
  // 38     39     3A     3B     3C     3D     3E     3F
349
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
350
  // 40     41     42     43     44     45     46     47
351
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
352
  // 48     49     4A     4B     4C     4D     4E     4F
353
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
354
  // 50     51     52     53     54     55     56     57
355
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
356
  // 58     59     5A     5B     5C     5D     5E     5F
357
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
358
  // 60     61     62     63     64     65     66     67
359
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
360
  // 68     69     6A     6B     6C     6D     6E     6F
361
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
362
  // 70     71     72     73     74     75     76     77
363
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
364
  // 78     79     7A     7B     7C     7D     7E     7F
365
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
366
  // 80     81     82     83     84     85     86     87
367
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
368
  // 88     89     8A     8B     8C     8D     8E     8F
369
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
370
  // 90     91     92     93     94     95     96     97
371
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
372
  // 98     99     9A     9B     9C     9D     9E     9F
373
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
374
  // A0     A1     A2     A3     A4     A5     A6     A7
375
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
376
  // A8     A9     AA     AB     AC     AD     AE     AF
377
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
378
  // B0     B1     B2     B3     B4     B5     B6     B7
379
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
380
  // B8     B9     BA     BB     BC     BD     BE     BF
381
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
382
  // C0     C1     C2     C3     C4     C5     C6     C7
383
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
384
  // C8     C9     CA     CB     CC     CD     CE     CF
385
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
386
  // D0     D1     D2     D3     D4     D5     D6     D7
387
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
388
  // D8     D9     DA     DB     DC     DD     DE     DF
389
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
390
  // E0     E1     E2     E3     E4     E5     E6     E7
391
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
392
  // E8     E9     EA     EB     EC     ED     EE     EF
393
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
394
  // F0     F1     F2     F3     F4     F5     F6     F7
395
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
396
  // F8     F9     FA     FB     FC     FD     FE     FF
397
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
398
};
399
400
401
const uint8_t PATH_ENCODE_SET[32] = {
402
  // 00     01     02     03     04     05     06     07
403
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
404
  // 08     09     0A     0B     0C     0D     0E     0F
405
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
406
  // 10     11     12     13     14     15     16     17
407
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
408
  // 18     19     1A     1B     1C     1D     1E     1F
409
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
410
  // 20     21     22     23     24     25     26     27
411
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
412
  // 28     29     2A     2B     2C     2D     2E     2F
413
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
414
  // 30     31     32     33     34     35     36     37
415
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
416
  // 38     39     3A     3B     3C     3D     3E     3F
417
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
418
  // 40     41     42     43     44     45     46     47
419
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
420
  // 48     49     4A     4B     4C     4D     4E     4F
421
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
422
  // 50     51     52     53     54     55     56     57
423
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
424
  // 58     59     5A     5B     5C     5D     5E     5F
425
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
426
  // 60     61     62     63     64     65     66     67
427
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
428
  // 68     69     6A     6B     6C     6D     6E     6F
429
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430
  // 70     71     72     73     74     75     76     77
431
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
432
  // 78     79     7A     7B     7C     7D     7E     7F
433
    0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
434
  // 80     81     82     83     84     85     86     87
435
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
436
  // 88     89     8A     8B     8C     8D     8E     8F
437
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
438
  // 90     91     92     93     94     95     96     97
439
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
440
  // 98     99     9A     9B     9C     9D     9E     9F
441
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
442
  // A0     A1     A2     A3     A4     A5     A6     A7
443
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
444
  // A8     A9     AA     AB     AC     AD     AE     AF
445
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
446
  // B0     B1     B2     B3     B4     B5     B6     B7
447
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
448
  // B8     B9     BA     BB     BC     BD     BE     BF
449
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
450
  // C0     C1     C2     C3     C4     C5     C6     C7
451
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452
  // C8     C9     CA     CB     CC     CD     CE     CF
453
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454
  // D0     D1     D2     D3     D4     D5     D6     D7
455
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456
  // D8     D9     DA     DB     DC     DD     DE     DF
457
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458
  // E0     E1     E2     E3     E4     E5     E6     E7
459
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460
  // E8     E9     EA     EB     EC     ED     EE     EF
461
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462
  // F0     F1     F2     F3     F4     F5     F6     F7
463
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
464
  // F8     F9     FA     FB     FC     FD     FE     FF
465
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
466
};
467
468
const uint8_t USERINFO_ENCODE_SET[32] = {
469
  // 00     01     02     03     04     05     06     07
470
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
471
  // 08     09     0A     0B     0C     0D     0E     0F
472
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
473
  // 10     11     12     13     14     15     16     17
474
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
475
  // 18     19     1A     1B     1C     1D     1E     1F
476
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
477
  // 20     21     22     23     24     25     26     27
478
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
479
  // 28     29     2A     2B     2C     2D     2E     2F
480
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
481
  // 30     31     32     33     34     35     36     37
482
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
483
  // 38     39     3A     3B     3C     3D     3E     3F
484
    0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
485
  // 40     41     42     43     44     45     46     47
486
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
487
  // 48     49     4A     4B     4C     4D     4E     4F
488
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
489
  // 50     51     52     53     54     55     56     57
490
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
491
  // 58     59     5A     5B     5C     5D     5E     5F
492
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
493
  // 60     61     62     63     64     65     66     67
494
    0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
495
  // 68     69     6A     6B     6C     6D     6E     6F
496
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
497
  // 70     71     72     73     74     75     76     77
498
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
499
  // 78     79     7A     7B     7C     7D     7E     7F
500
    0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
501
  // 80     81     82     83     84     85     86     87
502
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
503
  // 88     89     8A     8B     8C     8D     8E     8F
504
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
505
  // 90     91     92     93     94     95     96     97
506
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
507
  // 98     99     9A     9B     9C     9D     9E     9F
508
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
509
  // A0     A1     A2     A3     A4     A5     A6     A7
510
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
511
  // A8     A9     AA     AB     AC     AD     AE     AF
512
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
513
  // B0     B1     B2     B3     B4     B5     B6     B7
514
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
515
  // B8     B9     BA     BB     BC     BD     BE     BF
516
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
517
  // C0     C1     C2     C3     C4     C5     C6     C7
518
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
519
  // C8     C9     CA     CB     CC     CD     CE     CF
520
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
521
  // D0     D1     D2     D3     D4     D5     D6     D7
522
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
523
  // D8     D9     DA     DB     DC     DD     DE     DF
524
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
525
  // E0     E1     E2     E3     E4     E5     E6     E7
526
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
527
  // E8     E9     EA     EB     EC     ED     EE     EF
528
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
529
  // F0     F1     F2     F3     F4     F5     F6     F7
530
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
531
  // F8     F9     FA     FB     FC     FD     FE     FF
532
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
533
};
534
535
const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
536
  // 00     01     02     03     04     05     06     07
537
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
538
  // 08     09     0A     0B     0C     0D     0E     0F
539
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
540
  // 10     11     12     13     14     15     16     17
541
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
542
  // 18     19     1A     1B     1C     1D     1E     1F
543
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
544
  // 20     21     22     23     24     25     26     27
545
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
546
  // 28     29     2A     2B     2C     2D     2E     2F
547
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
548
  // 30     31     32     33     34     35     36     37
549
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
550
  // 38     39     3A     3B     3C     3D     3E     3F
551
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
552
  // 40     41     42     43     44     45     46     47
553
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
554
  // 48     49     4A     4B     4C     4D     4E     4F
555
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
556
  // 50     51     52     53     54     55     56     57
557
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
558
  // 58     59     5A     5B     5C     5D     5E     5F
559
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
560
  // 60     61     62     63     64     65     66     67
561
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
562
  // 68     69     6A     6B     6C     6D     6E     6F
563
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
564
  // 70     71     72     73     74     75     76     77
565
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
566
  // 78     79     7A     7B     7C     7D     7E     7F
567
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
568
  // 80     81     82     83     84     85     86     87
569
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
570
  // 88     89     8A     8B     8C     8D     8E     8F
571
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
572
  // 90     91     92     93     94     95     96     97
573
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
574
  // 98     99     9A     9B     9C     9D     9E     9F
575
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
576
  // A0     A1     A2     A3     A4     A5     A6     A7
577
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
578
  // A8     A9     AA     AB     AC     AD     AE     AF
579
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
580
  // B0     B1     B2     B3     B4     B5     B6     B7
581
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
582
  // B8     B9     BA     BB     BC     BD     BE     BF
583
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
584
  // C0     C1     C2     C3     C4     C5     C6     C7
585
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
586
  // C8     C9     CA     CB     CC     CD     CE     CF
587
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
588
  // D0     D1     D2     D3     D4     D5     D6     D7
589
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
590
  // D8     D9     DA     DB     DC     DD     DE     DF
591
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
592
  // E0     E1     E2     E3     E4     E5     E6     E7
593
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
594
  // E8     E9     EA     EB     EC     ED     EE     EF
595
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
596
  // F0     F1     F2     F3     F4     F5     F6     F7
597
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
598
  // F8     F9     FA     FB     FC     FD     FE     FF
599
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
600
};
601
602
// Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
603
const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
604
  // 00     01     02     03     04     05     06     07
605
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
606
  // 08     09     0A     0B     0C     0D     0E     0F
607
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
608
  // 10     11     12     13     14     15     16     17
609
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
610
  // 18     19     1A     1B     1C     1D     1E     1F
611
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
612
  // 20     21     22     23     24     25     26     27
613
    0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
614
  // 28     29     2A     2B     2C     2D     2E     2F
615
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
616
  // 30     31     32     33     34     35     36     37
617
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
618
  // 38     39     3A     3B     3C     3D     3E     3F
619
    0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
620
  // 40     41     42     43     44     45     46     47
621
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
622
  // 48     49     4A     4B     4C     4D     4E     4F
623
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
624
  // 50     51     52     53     54     55     56     57
625
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
626
  // 58     59     5A     5B     5C     5D     5E     5F
627
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
628
  // 60     61     62     63     64     65     66     67
629
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
630
  // 68     69     6A     6B     6C     6D     6E     6F
631
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
632
  // 70     71     72     73     74     75     76     77
633
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
634
  // 78     79     7A     7B     7C     7D     7E     7F
635
    0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
636
  // 80     81     82     83     84     85     86     87
637
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
638
  // 88     89     8A     8B     8C     8D     8E     8F
639
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
640
  // 90     91     92     93     94     95     96     97
641
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
642
  // 98     99     9A     9B     9C     9D     9E     9F
643
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
644
  // A0     A1     A2     A3     A4     A5     A6     A7
645
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
646
  // A8     A9     AA     AB     AC     AD     AE     AF
647
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
648
  // B0     B1     B2     B3     B4     B5     B6     B7
649
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
650
  // B8     B9     BA     BB     BC     BD     BE     BF
651
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
652
  // C0     C1     C2     C3     C4     C5     C6     C7
653
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
654
  // C8     C9     CA     CB     CC     CD     CE     CF
655
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
656
  // D0     D1     D2     D3     D4     D5     D6     D7
657
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
658
  // D8     D9     DA     DB     DC     DD     DE     DF
659
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
660
  // E0     E1     E2     E3     E4     E5     E6     E7
661
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
662
  // E8     E9     EA     EB     EC     ED     EE     EF
663
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
664
  // F0     F1     F2     F3     F4     F5     F6     F7
665
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
666
  // F8     F9     FA     FB     FC     FD     FE     FF
667
    0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
668
};
669
670
13793058
bool BitAt(const uint8_t a[], const uint8_t i) {
671
13793058
  return !!(a[i >> 3] & (1 << (i & 7)));
672
}
673
674
// Appends ch to str. If ch position in encode_set is set, the ch will
675
// be percent-encoded then appended.
676
13793059
void AppendOrEscape(std::string* str,
677
                           const unsigned char ch,
678
                           const uint8_t encode_set[]) {
679
13793059
  if (BitAt(encode_set, ch))
680
714
    *str += hex[ch];
681
  else
682
13792345
    *str += ch;
683
13793058
}
684
685
template <typename T>
686
594
unsigned hex2bin(const T ch) {
687

594
  if (ch >= '0' && ch <= '9')
688
461
    return ch - '0';
689

133
  if (ch >= 'A' && ch <= 'F')
690
34
    return 10 + (ch - 'A');
691

99
  if (ch >= 'a' && ch <= 'f')
692
99
    return 10 + (ch - 'a');
693
  return static_cast<unsigned>(-1);
694
}
695
696
12885
std::string PercentDecode(const char* input, size_t len) {
697
12885
  std::string dest;
698
12885
  if (len == 0)
699
6
    return dest;
700
12879
  dest.reserve(len);
701
12879
  const char* pointer = input;
702
12879
  const char* end = input + len;
703
704
361187
  while (pointer < end) {
705
174154
    const char ch = pointer[0];
706
174154
    size_t remaining = end - pointer - 1;
707


348117
    if (ch != '%' || remaining < 2 ||
708
200
        (ch == '%' &&
709
397
         (!IsASCIIHexDigit(pointer[1]) ||
710
197
          !IsASCIIHexDigit(pointer[2])))) {
711
173963
      dest += ch;
712
173963
      pointer++;
713
173963
      continue;
714
    } else {
715
191
      unsigned a = hex2bin(pointer[1]);
716
191
      unsigned b = hex2bin(pointer[2]);
717
191
      char c = static_cast<char>(a * 16 + b);
718
191
      dest += c;
719
191
      pointer += 3;
720
    }
721
  }
722
12879
  return dest;
723
}
724
725
#define SPECIALS(XX)                                                          \
726
  XX(ftp, 21, "ftp:")                                                         \
727
  XX(file, -1, "file:")                                                       \
728
  XX(gopher, 70, "gopher:")                                                   \
729
  XX(http, 80, "http:")                                                       \
730
  XX(https, 443, "https:")                                                    \
731
  XX(ws, 80, "ws:")                                                           \
732
  XX(wss, 443, "wss:")
733
734
335464
bool IsSpecial(const std::string& scheme) {
735
#define V(_, __, name) if (scheme == name) return true;
736



335464
  SPECIALS(V);
737
#undef V
738
1099
  return false;
739
}
740
741
157196
Local<String> GetSpecial(Environment* env, const std::string& scheme) {
742
#define V(key, _, name) if (scheme == name)                                  \
743
    return env->url_special_##key##_string();
744



157196
  SPECIALS(V)
745
#undef V
746
  UNREACHABLE();
747
}
748
749
129326
int NormalizePort(const std::string& scheme, int p) {
750
#define V(_, port, name) if (scheme == name && p == port) return -1;
751










129326
  SPECIALS(V);
752
#undef V
753
2932
  return p;
754
}
755
756
// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
757
1302
bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
758
1302
  size_t length = end - p;
759
1283
  return length >= 2 &&
760

1331
    IsWindowsDriveLetter(p[0], p[1]) &&
761
14
    (length == 2 ||
762
21
      p[2] == '/' ||
763
10
      p[2] == '\\' ||
764
5
      p[2] == '?' ||
765
1304
      p[2] == '#');
766
}
767
768
#if defined(NODE_HAVE_I18N_SUPPORT)
769
193
bool ToUnicode(const std::string& input, std::string* output) {
770
386
  MaybeStackBuffer<char> buf;
771
193
  if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
772
    return false;
773
193
  output->assign(*buf, buf.length());
774
193
  return true;
775
}
776
777
2055
bool ToASCII(const std::string& input, std::string* output) {
778
4110
  MaybeStackBuffer<char> buf;
779
2055
  if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
780
69
    return false;
781
1986
  output->assign(*buf, buf.length());
782
1986
  return true;
783
}
784
#else
785
// Intentional non-ops if ICU is not present.
786
bool ToUnicode(const std::string& input, std::string* output) {
787
  *output = input;
788
  return true;
789
}
790
791
bool ToASCII(const std::string& input, std::string* output) {
792
  *output = input;
793
  return true;
794
}
795
#endif
796
797
91
void URLHost::ParseIPv6Host(const char* input, size_t length) {
798
91
  CHECK_EQ(type_, HostType::H_FAILED);
799
91
  unsigned size = arraysize(value_.ipv6);
800
819
  for (unsigned n = 0; n < size; n++)
801
728
    value_.ipv6[n] = 0;
802
91
  uint16_t* piece_pointer = &value_.ipv6[0];
803
91
  uint16_t* const buffer_end = piece_pointer + size;
804
91
  uint16_t* compress_pointer = nullptr;
805
91
  const char* pointer = input;
806
91
  const char* end = pointer + length;
807
  unsigned value, len, numbers_seen;
808
91
  char ch = pointer < end ? pointer[0] : kEOL;
809
91
  if (ch == ':') {
810

33
    if (length < 2 || pointer[1] != ':')
811
3
      return;
812
30
    pointer += 2;
813
30
    ch = pointer < end ? pointer[0] : kEOL;
814
30
    piece_pointer++;
815
30
    compress_pointer = piece_pointer;
816
  }
817
404
  while (ch != kEOL) {
818
219
    if (piece_pointer >= buffer_end)
819
3
      return;
820
216
    if (ch == ':') {
821
17
      if (compress_pointer != nullptr)
822
3
        return;
823
14
      pointer++;
824
14
      ch = pointer < end ? pointer[0] : kEOL;
825
14
      piece_pointer++;
826
14
      compress_pointer = piece_pointer;
827
14
      continue;
828
    }
829
199
    value = 0;
830
199
    len = 0;
831

623
    while (len < 4 && IsASCIIHexDigit(ch)) {
832
212
      value = value * 0x10 + hex2bin(ch);
833
212
      pointer++;
834
212
      ch = pointer < end ? pointer[0] : kEOL;
835
212
      len++;
836
    }
837

199
    switch (ch) {
838
      case '.':
839
43
        if (len == 0)
840
3
          return;
841
40
        pointer -= len;
842
40
        ch = pointer < end ? pointer[0] : kEOL;
843
40
        if (piece_pointer > buffer_end - 2)
844
3
          return;
845
37
        numbers_seen = 0;
846
223
        while (ch != kEOL) {
847
123
          value = 0xffffffff;
848
123
          if (numbers_seen > 0) {
849

86
            if (ch == '.' && numbers_seen < 4) {
850
78
              pointer++;
851
78
              ch = pointer < end ? pointer[0] : kEOL;
852
            } else {
853
8
              return;
854
            }
855
          }
856
115
          if (!IsASCIIDigit(ch))
857
16
            return;
858
343
          while (IsASCIIDigit(ch)) {
859
128
            unsigned number = ch - '0';
860
128
            if (value == 0xffffffff) {
861
99
              value = number;
862
29
            } else if (value == 0) {
863
3
              return;
864
            } else {
865
26
              value = value * 10 + number;
866
            }
867
125
            if (value > 255)
868
3
              return;
869
122
            pointer++;
870
122
            ch = pointer < end ? pointer[0] : kEOL;
871
          }
872
93
          *piece_pointer = *piece_pointer * 0x100 + value;
873
93
          numbers_seen++;
874

93
          if (numbers_seen == 2 || numbers_seen == 4)
875
37
            piece_pointer++;
876
        }
877
7
        if (numbers_seen != 4)
878
3
          return;
879
4
        continue;
880
      case ':':
881
125
        pointer++;
882
125
        ch = pointer < end ? pointer[0] : kEOL;
883
125
        if (ch == kEOL)
884
3
          return;
885
122
        break;
886
      case kEOL:
887
18
        break;
888
      default:
889
13
        return;
890
    }
891
140
    *piece_pointer = value;
892
140
    piece_pointer++;
893
  }
894
895
27
  if (compress_pointer != nullptr) {
896
18
    unsigned swaps = piece_pointer - compress_pointer;
897
18
    piece_pointer = buffer_end - 1;
898

58
    while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
899
20
      uint16_t temp = *piece_pointer;
900
20
      uint16_t* swap_piece = compress_pointer + swaps - 1;
901
20
      *piece_pointer = *swap_piece;
902
20
      *swap_piece = temp;
903
20
       piece_pointer--;
904
20
       swaps--;
905
    }
906

9
  } else if (compress_pointer == nullptr &&
907
             piece_pointer != buffer_end) {
908
3
    return;
909
  }
910
24
  type_ = HostType::H_IPV6;
911
}
912
913
2071
int64_t ParseNumber(const char* start, const char* end) {
914
2071
  unsigned R = 10;
915

2071
  if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
916
26
    start += 2;
917
26
    R = 16;
918
  }
919
2071
  if (end - start == 0) {
920
4
    return 0;
921

2067
  } else if (R == 10 && end - start > 1 && start[0] == '0') {
922
32
    start++;
923
32
    R = 8;
924
  }
925
2067
  const char* p = start;
926
927
3557
  while (p < end) {
928
2600
    const char ch = p[0];
929

2600
    switch (R) {
930
      case 8:
931

173
        if (ch < '0' || ch > '7')
932
19
          return -1;
933
154
        break;
934
      case 10:
935
2305
        if (!IsASCIIDigit(ch))
936
1834
          return -1;
937
471
        break;
938
      case 16:
939
122
        if (!IsASCIIHexDigit(ch))
940
2
          return -1;
941
120
        break;
942
    }
943
745
    p++;
944
  }
945
212
  return strtoll(start, nullptr, R);
946
}
947
948
1936
void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
949
1936
  CHECK_EQ(type_, HostType::H_FAILED);
950
1936
  *is_ipv4 = false;
951
1936
  const char* pointer = input;
952
1936
  const char* mark = input;
953
1936
  const char* end = pointer + length;
954
1936
  int parts = 0;
955
1936
  uint32_t val = 0;
956
  uint64_t numbers[4];
957
1936
  int tooBigNumbers = 0;
958
1936
  if (length == 0)
959
1884
    return;
960
961
37738
  while (pointer <= end) {
962
19766
    const char ch = pointer < end ? pointer[0] : kEOL;
963
19766
    int remaining = end - pointer - 1;
964

19766
    if (ch == '.' || ch == kEOL) {
965
2079
      if (++parts > static_cast<int>(arraysize(numbers)))
966
2
        return;
967
2077
      if (pointer == mark)
968
6
        return;
969
2071
      int64_t n = ParseNumber(mark, pointer);
970
2071
      if (n < 0)
971
1855
        return;
972
973
216
      if (n > 255) {
974
69
        tooBigNumbers++;
975
      }
976
216
      numbers[parts - 1] = n;
977
216
      mark = pointer + 1;
978

216
      if (ch == '.' && remaining == 0)
979
2
        break;
980
    }
981
17901
    pointer++;
982
  }
983
73
  CHECK_GT(parts, 0);
984
73
  *is_ipv4 = true;
985
986
  // If any but the last item in numbers is greater than 255, return failure.
987
  // If the last item in numbers is greater than or equal to
988
  // 256^(5 - the number of items in numbers), return failure.
989

143
  if (tooBigNumbers > 1 ||
990

181
      (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
991
67
      numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
992
21
    return;
993
  }
994
995
52
  type_ = HostType::H_IPV4;
996
52
  val = numbers[parts - 1];
997
137
  for (int n = 0; n < parts - 1; n++) {
998
85
    double b = 3 - n;
999
85
    val += numbers[n] * pow(256, b);
1000
  }
1001
1002
52
  value_.ipv4 = val;
1003
}
1004
1005
147
void URLHost::ParseOpaqueHost(const char* input, size_t length) {
1006
147
  CHECK_EQ(type_, HostType::H_FAILED);
1007
274
  std::string output;
1008
147
  output.reserve(length);
1009
927
  for (size_t i = 0; i < length; i++) {
1010
800
    const char ch = input[i];
1011

800
    if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1012
20
      return;
1013
    } else {
1014
780
      AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1015
    }
1016
  }
1017
1018
127
  SetOpaque(std::move(output));
1019
}
1020
1021
2299
void URLHost::ParseHost(const char* input,
1022
                        size_t length,
1023
                        bool is_special,
1024
                        bool unicode) {
1025
2299
  CHECK_EQ(type_, HostType::H_FAILED);
1026
2299
  const char* pointer = input;
1027
1028
2299
  if (length == 0)
1029
436
    return;
1030
1031
2299
  if (pointer[0] == '[') {
1032
97
    if (pointer[length - 1] != ']')
1033
6
      return;
1034
91
    return ParseIPv6Host(++pointer, length - 2);
1035
  }
1036
1037
2202
  if (!is_special)
1038
147
    return ParseOpaqueHost(input, length);
1039
1040
  // First, we have to percent decode
1041
3918
  std::string decoded = PercentDecode(input, length);
1042
1043
  // Then we have to punycode toASCII
1044
2055
  if (!ToASCII(decoded, &decoded))
1045
69
    return;
1046
1047
  // If any of the following characters are still present, we have to fail
1048
66569
  for (size_t n = 0; n < decoded.size(); n++) {
1049
64633
    const char ch = decoded[n];
1050
64633
    if (IsForbiddenHostCodePoint(ch)) {
1051
50
      return;
1052
    }
1053
  }
1054
1055
  // Check to see if it's an IPv4 IP address
1056
  bool is_ipv4;
1057
1936
  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1058
1936
  if (is_ipv4)
1059
73
    return;
1060
1061
  // If the unicode flag is set, run the result through punycode ToUnicode
1062

1863
  if (unicode && !ToUnicode(decoded, &decoded))
1063
    return;
1064
1065
  // It's not an IPv4 or IPv6 address, it must be a domain
1066
1863
  SetDomain(std::move(decoded));
1067
}
1068
1069
// Locates the longest sequence of 0 segments in an IPv6 address
1070
// in order to use the :: compression when serializing
1071
template <typename T>
1072
24
T* FindLongestZeroSequence(T* values, size_t len) {
1073
24
  T* start = values;
1074
24
  T* end = start + len;
1075
24
  T* result = nullptr;
1076
1077
24
  T* current = nullptr;
1078
24
  unsigned counter = 0, longest = 1;
1079
1080
408
  while (start < end) {
1081
192
    if (*start == 0) {
1082
144
      if (current == nullptr)
1083
31
        current = start;
1084
144
      counter++;
1085
    } else {
1086
48
      if (counter > longest) {
1087
20
        longest = counter;
1088
20
        result = current;
1089
      }
1090
48
      counter = 0;
1091
48
      current = nullptr;
1092
    }
1093
192
    start++;
1094
  }
1095
24
  if (counter > longest)
1096
3
    result = current;
1097
24
  return result;
1098
}
1099
1100
2066
std::string URLHost::ToStringMove() {
1101
2066
  std::string return_value;
1102
2066
  switch (type_) {
1103
    case HostType::H_DOMAIN:
1104
    case HostType::H_OPAQUE:
1105
1990
      return_value = std::move(value_.domain_or_opaque);
1106
1990
      break;
1107
    default:
1108
76
      return_value = ToString();
1109
76
      break;
1110
  }
1111
2066
  Reset();
1112
2066
  return return_value;
1113
}
1114
1115
76
std::string URLHost::ToString() const {
1116
152
  std::string dest;
1117

76
  switch (type_) {
1118
    case HostType::H_DOMAIN:
1119
    case HostType::H_OPAQUE:
1120
      return value_.domain_or_opaque;
1121
      break;
1122
    case HostType::H_IPV4: {
1123
52
      dest.reserve(15);
1124
52
      uint32_t value = value_.ipv4;
1125
260
      for (int n = 0; n < 4; n++) {
1126
        char buf[4];
1127
208
        snprintf(buf, sizeof(buf), "%d", value % 256);
1128
208
        dest.insert(0, buf);
1129
208
        if (n < 3)
1130
156
          dest.insert(0, 1, '.');
1131
208
        value /= 256;
1132
      }
1133
52
      break;
1134
    }
1135
    case HostType::H_IPV6: {
1136
24
      dest.reserve(41);
1137
24
      dest += '[';
1138
24
      const uint16_t* start = &value_.ipv6[0];
1139
      const uint16_t* compress_pointer =
1140
24
          FindLongestZeroSequence(start, 8);
1141
24
      bool ignore0 = false;
1142
216
      for (int n = 0; n <= 7; n++) {
1143
192
        const uint16_t* piece = &value_.ipv6[n];
1144

192
        if (ignore0 && *piece == 0)
1145
246
          continue;
1146
80
        else if (ignore0)
1147
19
          ignore0 = false;
1148
80
        if (compress_pointer == piece) {
1149
22
          dest += n == 0 ? "::" : ":";
1150
22
          ignore0 = true;
1151
22
          continue;
1152
        }
1153
        char buf[5];
1154
58
        snprintf(buf, sizeof(buf), "%x", *piece);
1155
58
        dest += buf;
1156
58
        if (n < 7)
1157
37
          dest += ':';
1158
      }
1159
24
      dest += ']';
1160
24
      break;
1161
    }
1162
    case HostType::H_FAILED:
1163
      break;
1164
  }
1165
76
  return dest;
1166
}
1167
1168
1917
bool ParseHost(const std::string& input,
1169
               std::string* output,
1170
               bool is_special,
1171
               bool unicode = false) {
1172
1917
  if (input.length() == 0) {
1173
42
    output->clear();
1174
42
    return true;
1175
  }
1176
3750
  URLHost host;
1177
1875
  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1178
1875
  if (host.ParsingFailed())
1179
215
    return false;
1180
1660
  *output = host.ToStringMove();
1181
1660
  return true;
1182
}
1183
1184
1046
std::vector<std::string> FromJSStringArray(Environment* env,
1185
                                           Local<Array> array) {
1186
1046
  std::vector<std::string> vec;
1187
1046
  if (array->Length() > 0)
1188
1038
    vec.reserve(array->Length());
1189
6190
  for (size_t n = 0; n < array->Length(); n++) {
1190
6147
    Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1191
4098
    if (val->IsString()) {
1192
4098
      Utf8Value value(env->isolate(), val.As<String>());
1193
2049
      vec.emplace_back(*value, value.length());
1194
    }
1195
  }
1196
1046
  return vec;
1197
}
1198
1199
1046
url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1200
1046
  url_data base;
1201
1046
  Local<Context> context = env->context();
1202
1203
  Local<Value> flags =
1204
4184
      base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1205
1046
  if (flags->IsInt32())
1206
2092
    base.flags = flags->Int32Value(context).FromJust();
1207
1208
  Local<Value> port =
1209
4184
      base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1210
1046
  if (port->IsInt32())
1211
8
    base.port = port->Int32Value(context).FromJust();
1212
1213
  Local<Value> scheme =
1214
4184
      base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1215
1046
  base.scheme = Utf8Value(env->isolate(), scheme).out();
1216
1217
  auto GetStr = [&](std::string url_data::*member,
1218
                    int flag,
1219
                    Local<String> name,
1220
5230
                    bool empty_as_present) {
1221
18336
    Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1222
10460
    if (value->IsString()) {
1223
5292
      Utf8Value utf8value(env->isolate(), value.As<String>());
1224
3212
      (base.*member).assign(*utf8value, utf8value.length());
1225

6830
      if (empty_as_present || value.As<String>()->Length() != 0) {
1226
566
        base.flags |= flag;
1227
      }
1228
    }
1229
6276
  };
1230
1046
  GetStr(&url_data::username,
1231
         URL_FLAGS_HAS_USERNAME,
1232
         env->username_string(),
1233
1046
         false);
1234
1046
  GetStr(&url_data::password,
1235
         URL_FLAGS_HAS_PASSWORD,
1236
         env->password_string(),
1237
1046
         false);
1238
1046
  GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1239
1046
  GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1240
1046
  GetStr(&url_data::fragment,
1241
         URL_FLAGS_HAS_FRAGMENT,
1242
         env->fragment_string(),
1243
1046
         true);
1244
1245
  Local<Value>
1246
4184
      path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1247
1046
  if (path->IsArray()) {
1248
1046
    base.flags |= URL_FLAGS_HAS_PATH;
1249
1046
    base.path = FromJSStringArray(env, path.As<Array>());
1250
  }
1251
1046
  return base;
1252
}
1253
1254
54210
url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1255
54210
  url_data context;
1256
  Local<Value> flags =
1257
216840
      context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1258
54210
  if (flags->IsInt32()) {
1259
    static constexpr int32_t kCopyFlagsMask =
1260
        URL_FLAGS_SPECIAL |
1261
        URL_FLAGS_CANNOT_BE_BASE |
1262
        URL_FLAGS_HAS_USERNAME |
1263
        URL_FLAGS_HAS_PASSWORD |
1264
        URL_FLAGS_HAS_HOST;
1265
108420
    context.flags |= flags.As<Int32>()->Value() & kCopyFlagsMask;
1266
  }
1267
  Local<Value> scheme =
1268
216840
      context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1269
108420
  if (scheme->IsString()) {
1270
108420
    Utf8Value value(env->isolate(), scheme);
1271
54210
    context.scheme.assign(*value, value.length());
1272
  }
1273
  Local<Value> port =
1274
216840
      context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1275
54210
  if (port->IsInt32())
1276
44
    context.port = port.As<Int32>()->Value();
1277
54210
  if (context.flags & URL_FLAGS_HAS_USERNAME) {
1278
    Local<Value> username =
1279
28
        context_obj->Get(env->context(),
1280
70
                         env->username_string()).ToLocalChecked();
1281
28
    CHECK(username->IsString());
1282
28
    Utf8Value value(env->isolate(), username);
1283
14
    context.username.assign(*value, value.length());
1284
  }
1285
54210
  if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1286
    Local<Value> password =
1287
16
        context_obj->Get(env->context(),
1288
40
                         env->password_string()).ToLocalChecked();
1289
16
    CHECK(password->IsString());
1290
16
    Utf8Value value(env->isolate(), password);
1291
8
    context.password.assign(*value, value.length());
1292
  }
1293
  Local<Value> host =
1294
108420
      context_obj->Get(env->context(),
1295
271050
                       env->host_string()).ToLocalChecked();
1296
108420
  if (host->IsString()) {
1297
108400
    Utf8Value value(env->isolate(), host);
1298
54200
    context.host.assign(*value, value.length());
1299
  }
1300
54210
  return context;
1301
}
1302
1303
// Single dot segment can be ".", "%2e", or "%2E"
1304
3020409
bool IsSingleDotSegment(const std::string& str) {
1305
3020409
  switch (str.size()) {
1306
    case 1:
1307
1265
      return str == ".";
1308
    case 3:
1309
146584
      return str[0] == '%' &&
1310

146580
             str[1] == '2' &&
1311
146580
             ASCIILowercase(str[2]) == 'e';
1312
    default:
1313
2872590
      return false;
1314
  }
1315
}
1316
1317
// Double dot segment can be:
1318
//   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1319
//   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
1320
1511232
bool IsDoubleDotSegment(const std::string& str) {
1321

1511232
  switch (str.size()) {
1322
    case 2:
1323
1601
      return str == "..";
1324
    case 4:
1325

365277
      if (str[0] != '.' && str[0] != '%')
1326
365263
        return false;
1327
25
      return ((str[0] == '.' &&
1328
13
               str[1] == '%' &&
1329
4
               str[2] == '2' &&
1330

33
               ASCIILowercase(str[3]) == 'e') ||
1331
15
              (str[0] == '%' &&
1332
6
               str[1] == '2' &&
1333
6
               ASCIILowercase(str[2]) == 'e' &&
1334
17
               str[3] == '.'));
1335
    case 6:
1336
59790
      return (str[0] == '%' &&
1337
8
              str[1] == '2' &&
1338
6
              ASCIILowercase(str[2]) == 'e' &&
1339
4
              str[3] == '%' &&
1340

59790
              str[4] == '2' &&
1341
59788
              ASCIILowercase(str[5]) == 'e');
1342
    default:
1343
1084568
      return false;
1344
  }
1345
}
1346
1347
2285
void ShortenUrlPath(struct url_data* url) {
1348
2285
  if (url->path.empty()) return;
1349


2322
  if (url->path.size() == 1 && url->scheme == "file:" &&
1350
149
      IsNormalizedWindowsDriveLetter(url->path[0])) return;
1351
2173
  url->path.pop_back();
1352
}
1353
1354
}  // anonymous namespace
1355
1356
210603
void URL::Parse(const char* input,
1357
                size_t len,
1358
                enum url_parse_state state_override,
1359
                struct url_data* url,
1360
                bool has_url,
1361
                const struct url_data* base,
1362
                bool has_base) {
1363
210603
  const char* p = input;
1364
210603
  const char* end = input + len;
1365
1366
210603
  if (!has_url) {
1367
131803
    for (const char* ptr = p; ptr < end; ptr++) {
1368
131357
      if (IsC0ControlOrSpace(*ptr))
1369
28
        p++;
1370
      else
1371
131329
        break;
1372
    }
1373
131800
    for (const char* ptr = end - 1; ptr >= p; ptr--) {
1374
131354
      if (IsC0ControlOrSpace(*ptr))
1375
25
        end--;
1376
      else
1377
131329
        break;
1378
    }
1379
131775
    input = p;
1380
131775
    len = end - p;
1381
  }
1382
1383
  // The spec says we should strip out any ASCII tabs or newlines.
1384
  // In those cases, we create another std::string instance with the filtered
1385
  // contents, but in the general case we avoid the overhead.
1386
419309
  std::string whitespace_stripped;
1387
16410170
  for (const char* ptr = p; ptr < end; ptr++) {
1388
16199589
    if (!IsASCIITabOrNewline(*ptr))
1389
16199567
      continue;
1390
    // Hit tab or newline. Allocate storage, copy what we have until now,
1391
    // and then iterate and filter all similar characters out.
1392
22
    whitespace_stripped.reserve(len - 1);
1393
22
    whitespace_stripped.assign(p, ptr - p);
1394
    // 'ptr + 1' skips the current char, which we know to be tab or newline.
1395
318
    for (ptr = ptr + 1; ptr < end; ptr++) {
1396
296
      if (!IsASCIITabOrNewline(*ptr))
1397
254
        whitespace_stripped += *ptr;
1398
    }
1399
1400
    // Update variables like they should have looked like if the string
1401
    // had been stripped of whitespace to begin with.
1402
22
    input = whitespace_stripped.c_str();
1403
22
    len = whitespace_stripped.size();
1404
22
    p = input;
1405
22
    end = input + len;
1406
22
    break;
1407
  }
1408
1409
210603
  bool atflag = false;  // Set when @ has been seen.
1410
210603
  bool square_bracket_flag = false;  // Set inside of [...]
1411
210603
  bool password_token_seen_flag = false;  // Set after a : after an username.
1412
1413
419309
  std::string buffer;
1414
1415
  // Set the initial parse state.
1416
210609
  const bool has_state_override = state_override != kUnknownState;
1417
210609
  enum url_parse_state state = has_state_override ? state_override :
1418
210609
                                                    kSchemeStart;
1419
1420

210609
  if (state < kSchemeStart || state > kFragment) {
1421
    url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1422
    return;
1423
  }
1424
1425

33532505
  while (p <= end) {
1426
16662851
    const char ch = p < end ? p[0] : kEOL;
1427
16662851
    bool special = (url->flags & URL_FLAGS_SPECIAL);
1428
    bool cannot_be_base;
1429

16662851
    const bool special_back_slash = (special && ch == '\\');
1430
1431





16662851
    switch (state) {
1432
      case kSchemeStart:
1433
131801
        if (IsASCIIAlpha(ch)) {
1434
129304
          buffer += ASCIILowercase(ch);
1435
129300
          state = kScheme;
1436
2497
        } else if (!has_state_override) {
1437
2494
          state = kNoScheme;
1438
2494
          continue;
1439
        } else {
1440
3
          url->flags |= URL_FLAGS_FAILED;
1441
3
          return;
1442
        }
1443
129300
        break;
1444
      case kScheme:
1445


519912
        if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1446
390608
          buffer += ASCIILowercase(ch);
1447

129304
        } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1448

128858
          if (has_state_override && buffer.size() == 0) {
1449
            url->flags |= URL_FLAGS_TERMINATED;
1450
            return;
1451
          }
1452
128858
          buffer += ':';
1453
1454
128858
          bool new_is_special = IsSpecial(buffer);
1455
1456
128858
          if (has_state_override) {
1457

45
            if ((special != new_is_special) ||
1458
14
                ((buffer == "file:") &&
1459
4
                 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1460
2
                  (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1461
1
                  (url->port != -1)))) {
1462
13
              url->flags |= URL_FLAGS_TERMINATED;
1463
13
              return;
1464
            }
1465
1466
            // File scheme && (host == empty or null) check left to JS-land
1467
            // as it can be done before even entering C++ binding.
1468
          }
1469
1470
128845
          url->scheme = std::move(buffer);
1471
128845
          url->port = NormalizePort(url->scheme, url->port);
1472
128845
          if (new_is_special) {
1473
127976
            url->flags |= URL_FLAGS_SPECIAL;
1474
127976
            special = true;
1475
          } else {
1476
869
            url->flags &= ~URL_FLAGS_SPECIAL;
1477
869
            special = false;
1478
          }
1479
128845
          buffer.clear();
1480
128845
          if (has_state_override)
1481
8
            return;
1482
128837
          if (url->scheme == "file:") {
1483
126364
            state = kFile;
1484

4082
          } else if (special &&
1485

2988
                     has_base &&
1486
515
                     url->scheme == base->scheme) {
1487
178
            state = kSpecialRelativeOrAuthority;
1488
2295
          } else if (special) {
1489
1431
            state = kSpecialAuthoritySlashes;
1490
864
          } else if (p[1] == '/') {
1491
235
            state = kPathOrAuthority;
1492
235
            p++;
1493
          } else {
1494
629
            url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1495
629
            url->flags |= URL_FLAGS_HAS_PATH;
1496
629
            url->path.emplace_back("");
1497
629
            state = kCannotBeBase;
1498
128837
          }
1499
446
        } else if (!has_state_override) {
1500
444
          buffer.clear();
1501
444
          state = kNoScheme;
1502
444
          p = input;
1503
444
          continue;
1504
        } else {
1505
2
          url->flags |= URL_FLAGS_FAILED;
1506
2
          return;
1507
        }
1508
519445
        break;
1509
      case kNoScheme:
1510

2938
        cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1511

2938
        if (!has_base || (cannot_be_base && ch != '#')) {
1512
1452
          url->flags |= URL_FLAGS_FAILED;
1513
1452
          return;
1514

1486
        } else if (cannot_be_base && ch == '#') {
1515
14
          url->scheme = base->scheme;
1516
14
          if (IsSpecial(url->scheme)) {
1517
            url->flags |= URL_FLAGS_SPECIAL;
1518
            special = true;
1519
          } else {
1520
14
            url->flags &= ~URL_FLAGS_SPECIAL;
1521
14
            special = false;
1522
          }
1523
14
          if (base->flags & URL_FLAGS_HAS_PATH) {
1524
14
            url->flags |= URL_FLAGS_HAS_PATH;
1525
14
            url->path = base->path;
1526
          }
1527
14
          if (base->flags & URL_FLAGS_HAS_QUERY) {
1528
2
            url->flags |= URL_FLAGS_HAS_QUERY;
1529
2
            url->query = base->query;
1530
          }
1531
14
          if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1532
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
1533
            url->fragment = base->fragment;
1534
          }
1535
14
          url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1536
14
          state = kFragment;
1537

2944
        } else if (has_base &&
1538
1472
                   base->scheme != "file:") {
1539
147
          state = kRelative;
1540
147
          continue;
1541
        } else {
1542
1325
          url->scheme = "file:";
1543
1325
          url->flags |= URL_FLAGS_SPECIAL;
1544
1325
          special = true;
1545
1325
          state = kFile;
1546
1325
          continue;
1547
        }
1548
14
        break;
1549
      case kSpecialRelativeOrAuthority:
1550

178
        if (ch == '/' && p[1] == '/') {
1551
162
          state = kSpecialAuthorityIgnoreSlashes;
1552
162
          p++;
1553
        } else {
1554
16
          state = kRelative;
1555
16
          continue;
1556
        }
1557
162
        break;
1558
      case kPathOrAuthority:
1559
235
        if (ch == '/') {
1560
181
          state = kAuthority;
1561
        } else {
1562
54
          state = kPath;
1563
54
          continue;
1564
        }
1565
181
        break;
1566
      case kRelative:
1567
163
        url->scheme = base->scheme;
1568
163
        if (IsSpecial(url->scheme)) {
1569
120
          url->flags |= URL_FLAGS_SPECIAL;
1570
120
          special = true;
1571
        } else {
1572
43
          url->flags &= ~URL_FLAGS_SPECIAL;
1573
43
          special = false;
1574
        }
1575

163
        switch (ch) {
1576
          case kEOL:
1577
8
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1578
2
              url->flags |= URL_FLAGS_HAS_USERNAME;
1579
2
              url->username = base->username;
1580
            }
1581
8
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1582
2
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1583
2
              url->password = base->password;
1584
            }
1585
8
            if (base->flags & URL_FLAGS_HAS_HOST) {
1586
8
              url->flags |= URL_FLAGS_HAS_HOST;
1587
8
              url->host = base->host;
1588
            }
1589
8
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1590
              url->flags |= URL_FLAGS_HAS_QUERY;
1591
              url->query = base->query;
1592
            }
1593
8
            if (base->flags & URL_FLAGS_HAS_PATH) {
1594
8
              url->flags |= URL_FLAGS_HAS_PATH;
1595
8
              url->path = base->path;
1596
            }
1597
8
            url->port = base->port;
1598
8
            break;
1599
          case '/':
1600
35
            state = kRelativeSlash;
1601
35
            break;
1602
          case '?':
1603
24
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1604
              url->flags |= URL_FLAGS_HAS_USERNAME;
1605
              url->username = base->username;
1606
            }
1607
24
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1608
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1609
              url->password = base->password;
1610
            }
1611
24
            if (base->flags & URL_FLAGS_HAS_HOST) {
1612
22
              url->flags |= URL_FLAGS_HAS_HOST;
1613
22
              url->host = base->host;
1614
            }
1615
24
            if (base->flags & URL_FLAGS_HAS_PATH) {
1616
24
              url->flags |= URL_FLAGS_HAS_PATH;
1617
24
              url->path = base->path;
1618
            }
1619
24
            url->port = base->port;
1620
24
            state = kQuery;
1621
24
            break;
1622
          case '#':
1623
18
            if (base->flags & URL_FLAGS_HAS_USERNAME) {
1624
              url->flags |= URL_FLAGS_HAS_USERNAME;
1625
              url->username = base->username;
1626
            }
1627
18
            if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1628
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1629
              url->password = base->password;
1630
            }
1631
18
            if (base->flags & URL_FLAGS_HAS_HOST) {
1632
16
              url->flags |= URL_FLAGS_HAS_HOST;
1633
16
              url->host = base->host;
1634
            }
1635
18
            if (base->flags & URL_FLAGS_HAS_QUERY) {
1636
              url->flags |= URL_FLAGS_HAS_QUERY;
1637
              url->query = base->query;
1638
            }
1639
18
            if (base->flags & URL_FLAGS_HAS_PATH) {
1640
18
              url->flags |= URL_FLAGS_HAS_PATH;
1641
18
              url->path = base->path;
1642
            }
1643
18
            url->port = base->port;
1644
18
            state = kFragment;
1645
18
            break;
1646
          default:
1647
78
            if (special_back_slash) {
1648
4
              state = kRelativeSlash;
1649
            } else {
1650
74
              if (base->flags & URL_FLAGS_HAS_USERNAME) {
1651
1
                url->flags |= URL_FLAGS_HAS_USERNAME;
1652
1
                url->username = base->username;
1653
              }
1654
74
              if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1655
1
                url->flags |= URL_FLAGS_HAS_PASSWORD;
1656
1
                url->password = base->password;
1657
              }
1658
74
              if (base->flags & URL_FLAGS_HAS_HOST) {
1659
68
                url->flags |= URL_FLAGS_HAS_HOST;
1660
68
                url->host = base->host;
1661
              }
1662
74
              if (base->flags & URL_FLAGS_HAS_PATH) {
1663
74
                url->flags |= URL_FLAGS_HAS_PATH;
1664
74
                url->path = base->path;
1665
74
                ShortenUrlPath(url);
1666
              }
1667
74
              url->port = base->port;
1668
74
              state = kPath;
1669
74
              continue;
1670
            }
1671
        }
1672
89
        break;
1673
      case kRelativeSlash:
1674


39
        if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1675
8
          state = kSpecialAuthorityIgnoreSlashes;
1676
31
        } else if (ch == '/') {
1677
3
          state = kAuthority;
1678
        } else {
1679
28
          if (base->flags & URL_FLAGS_HAS_USERNAME) {
1680
4
            url->flags |= URL_FLAGS_HAS_USERNAME;
1681
4
            url->username = base->username;
1682
          }
1683
28
          if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1684
2
            url->flags |= URL_FLAGS_HAS_PASSWORD;
1685
2
            url->password = base->password;
1686
          }
1687
28
          if (base->flags & URL_FLAGS_HAS_HOST) {
1688
26
            url->flags |= URL_FLAGS_HAS_HOST;
1689
26
            url->host = base->host;
1690
          }
1691
28
          url->port = base->port;
1692
28
          state = kPath;
1693
28
          continue;
1694
        }
1695
11
        break;
1696
      case kSpecialAuthoritySlashes:
1697
1431
        state = kSpecialAuthorityIgnoreSlashes;
1698

1431
        if (ch == '/' && p[1] == '/') {
1699
1339
          p++;
1700
        } else {
1701
92
          continue;
1702
        }
1703
1339
        break;
1704
      case kSpecialAuthorityIgnoreSlashes:
1705

1647
        if (ch != '/' && ch != '\\') {
1706
1601
          state = kAuthority;
1707
1601
          continue;
1708
        }
1709
46
        break;
1710
      case kAuthority:
1711
55507
        if (ch == '@') {
1712
151
          if (atflag) {
1713
13
            buffer.reserve(buffer.size() + 3);
1714
13
            buffer.insert(0, "%40");
1715
          }
1716
151
          atflag = true;
1717
151
          size_t blen = buffer.size();
1718

151
          if (blen > 0 && buffer[0] != ':') {
1719
93
            url->flags |= URL_FLAGS_HAS_USERNAME;
1720
          }
1721
717
          for (size_t n = 0; n < blen; n++) {
1722
566
            const char bch = buffer[n];
1723
566
            if (bch == ':') {
1724
87
              url->flags |= URL_FLAGS_HAS_PASSWORD;
1725
87
              if (!password_token_seen_flag) {
1726
85
                password_token_seen_flag = true;
1727
85
                continue;
1728
              }
1729
            }
1730
481
            if (password_token_seen_flag) {
1731
189
              AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1732
            } else {
1733
292
              AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1734
            }
1735
          }
1736
151
          buffer.clear();
1737

55356
        } else if (ch == kEOL ||
1738
53606
                   ch == '/' ||
1739
53589
                   ch == '?' ||
1740
53579
                   ch == '#' ||
1741
                   special_back_slash) {
1742

1785
          if (atflag && buffer.size() == 0) {
1743
39
            url->flags |= URL_FLAGS_FAILED;
1744
39
            return;
1745
          }
1746
1746
          p -= buffer.size() + 1;
1747
1746
          buffer.clear();
1748
1746
          state = kHost;
1749
        } else {
1750
53571
          buffer += ch;
1751
        }
1752
55468
        break;
1753
      case kHost:
1754
      case kHostname:
1755

54922
        if (has_state_override && url->scheme == "file:") {
1756
6
          state = kFileHost;
1757
6
          continue;
1758

54916
        } else if (ch == ':' && !square_bracket_flag) {
1759
538
          if (buffer.size() == 0) {
1760
19
            url->flags |= URL_FLAGS_FAILED;
1761
19
            return;
1762
          }
1763
519
          url->flags |= URL_FLAGS_HAS_HOST;
1764
519
          if (!ParseHost(buffer, &url->host, special)) {
1765
3
            url->flags |= URL_FLAGS_FAILED;
1766
3
            return;
1767
          }
1768
516
          buffer.clear();
1769
516
          state = kPort;
1770
1030
          if (state_override == kHostname) {
1771
2
            return;
1772
          }
1773

54378
        } else if (ch == kEOL ||
1774
53074
                   ch == '/' ||
1775
53053
                   ch == '?' ||
1776
53039
                   ch == '#' ||
1777
                   special_back_slash) {
1778
1349
          p--;
1779

1349
          if (special && buffer.size() == 0) {
1780
11
            url->flags |= URL_FLAGS_FAILED;
1781
11
            return;
1782
          }
1783

1457
          if (has_state_override &&
1784

1360
              buffer.size() == 0 &&
1785

50
              ((url->username.size() > 0 || url->password.size() > 0) ||
1786
16
               url->port != -1)) {
1787
4
            url->flags |= URL_FLAGS_TERMINATED;
1788
4
            return;
1789
          }
1790
1334
          url->flags |= URL_FLAGS_HAS_HOST;
1791
1334
          if (!ParseHost(buffer, &url->host, special)) {
1792
198
            url->flags |= URL_FLAGS_FAILED;
1793
198
            return;
1794
          }
1795
1136
          buffer.clear();
1796
1136
          state = kPathStart;
1797
2193
          if (has_state_override) {
1798
79
            return;
1799
          }
1800
        } else {
1801
53029
          if (ch == '[')
1802
93
            square_bracket_flag = true;
1803
53029
          if (ch == ']')
1804
90
            square_bracket_flag = false;
1805
53029
          buffer += ch;
1806
        }
1807
54600
        break;
1808
      case kPort:
1809
2947
        if (IsASCIIDigit(ch)) {
1810
2416
          buffer += ch;
1811

531
        } else if (has_state_override ||
1812
185
                   ch == kEOL ||
1813
27
                   ch == '/' ||
1814
27
                   ch == '?' ||
1815
27
                   ch == '#' ||
1816
                   special_back_slash) {
1817
504
          if (buffer.size() > 0) {
1818
499
            unsigned port = 0;
1819
            // the condition port <= 0xffff prevents integer overflow
1820

2753
            for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1821
2254
              port = port * 10 + buffer[i] - '0';
1822
499
            if (port > 0xffff) {
1823
              // TODO(TimothyGu): This hack is currently needed for the host
1824
              // setter since it needs access to hostname if it is valid, and
1825
              // if the FAILED flag is set the entire response to JS layer
1826
              // will be empty.
1827
18
              if (state_override == kHost)
1828
1
                url->port = -1;
1829
              else
1830
17
                url->flags |= URL_FLAGS_FAILED;
1831
18
              return;
1832
            }
1833
            // the port is valid
1834
481
            url->port = NormalizePort(url->scheme, static_cast<int>(port));
1835
481
            if (url->port == -1)
1836
29
              url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1837
481
            buffer.clear();
1838
5
          } else if (has_state_override) {
1839
            // TODO(TimothyGu): Similar case as above.
1840
1
            if (state_override == kHost)
1841
1
              url->port = -1;
1842
            else
1843
              url->flags |= URL_FLAGS_TERMINATED;
1844
1
            return;
1845
          }
1846
485
          state = kPathStart;
1847
485
          continue;
1848
        } else {
1849
27
          url->flags |= URL_FLAGS_FAILED;
1850
27
          return;
1851
        }
1852
2416
        break;
1853
      case kFile:
1854
127689
        url->scheme = "file:";
1855

127689
        if (ch == '/' || ch == '\\') {
1856
126461
          state = kFileSlash;
1857

1228
        } else if (has_base && base->scheme == "file:") {
1858

1221
          switch (ch) {
1859
            case kEOL:
1860
13
              if (base->flags & URL_FLAGS_HAS_HOST) {
1861
13
                url->flags |= URL_FLAGS_HAS_HOST;
1862
13
                url->host = base->host;
1863
              }
1864
13
              if (base->flags & URL_FLAGS_HAS_PATH) {
1865
13
                url->flags |= URL_FLAGS_HAS_PATH;
1866
13
                url->path = base->path;
1867
              }
1868
13
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1869
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1870
2
                url->query = base->query;
1871
              }
1872
13
              break;
1873
            case '?':
1874
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1875
2
                url->flags |= URL_FLAGS_HAS_HOST;
1876
2
                url->host = base->host;
1877
              }
1878
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1879
2
                url->flags |= URL_FLAGS_HAS_PATH;
1880
2
                url->path = base->path;
1881
              }
1882
2
              url->flags |= URL_FLAGS_HAS_QUERY;
1883
2
              url->query.clear();
1884
2
              state = kQuery;
1885
2
              break;
1886
            case '#':
1887
2
              if (base->flags & URL_FLAGS_HAS_HOST) {
1888
2
                url->flags |= URL_FLAGS_HAS_HOST;
1889
2
                url->host = base->host;
1890
              }
1891
2
              if (base->flags & URL_FLAGS_HAS_PATH) {
1892
2
                url->flags |= URL_FLAGS_HAS_PATH;
1893
2
                url->path = base->path;
1894
              }
1895
2
              if (base->flags & URL_FLAGS_HAS_QUERY) {
1896
2
                url->flags |= URL_FLAGS_HAS_QUERY;
1897
2
                url->query = base->query;
1898
              }
1899
2
              url->flags |= URL_FLAGS_HAS_FRAGMENT;
1900
2
              url->fragment.clear();
1901
2
              state = kFragment;
1902
2
              break;
1903
            default:
1904
1204
              if (!StartsWithWindowsDriveLetter(p, end)) {
1905
1195
                if (base->flags & URL_FLAGS_HAS_HOST) {
1906
1195
                  url->flags |= URL_FLAGS_HAS_HOST;
1907
1195
                  url->host = base->host;
1908
                }
1909
1195
                if (base->flags & URL_FLAGS_HAS_PATH) {
1910
1195
                  url->flags |= URL_FLAGS_HAS_PATH;
1911
1195
                  url->path = base->path;
1912
                }
1913
1195
                ShortenUrlPath(url);
1914
              }
1915
1204
              state = kPath;
1916
1204
              continue;
1917
          }
1918
        } else {
1919
7
          state = kPath;
1920
7
          continue;
1921
        }
1922
126478
        break;
1923
      case kFileSlash:
1924

126461
        if (ch == '/' || ch == '\\') {
1925
126360
          state = kFileHost;
1926
        } else {
1927

202
          if (has_base &&
1928

199
              base->scheme == "file:" &&
1929
98
              !StartsWithWindowsDriveLetter(p, end)) {
1930
93
            if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1931
1
              url->flags |= URL_FLAGS_HAS_PATH;
1932
1
              url->path.push_back(base->path[0]);
1933
            } else {
1934
92
              if (base->flags & URL_FLAGS_HAS_HOST) {
1935
92
                url->flags |= URL_FLAGS_HAS_HOST;
1936
92
                url->host = base->host;
1937
              } else {
1938
                url->flags &= ~URL_FLAGS_HAS_HOST;
1939
                url->host.clear();
1940
              }
1941
            }
1942
          }
1943
101
          state = kPath;
1944
101
          continue;
1945
        }
1946
126360
        break;
1947
      case kFileHost:
1948

126761
        if (ch == kEOL ||
1949
400
            ch == '/' ||
1950
395
            ch == '\\' ||
1951
395
            ch == '?' ||
1952
            ch == '#') {
1953

379092
          if (!has_state_override &&
1954

126373
              buffer.size() == 2 &&
1955
7
              IsWindowsDriveLetter(buffer)) {
1956
4
            state = kPath;
1957
126362
          } else if (buffer.size() == 0) {
1958
126298
            url->flags |= URL_FLAGS_HAS_HOST;
1959
126298
            url->host.clear();
1960
126298
            if (has_state_override)
1961
2
              return;
1962
126296
            state = kPathStart;
1963
          } else {
1964
112
            std::string host;
1965
64
            if (!ParseHost(buffer, &host, special)) {
1966
14
              url->flags |= URL_FLAGS_FAILED;
1967
14
              return;
1968
            }
1969
50
            if (host == "localhost")
1970
11
              host.clear();
1971
50
            url->flags |= URL_FLAGS_HAS_HOST;
1972
50
            url->host = host;
1973
50
            if (has_state_override)
1974
2
              return;
1975
48
            buffer.clear();
1976
48
            state = kPathStart;
1977
          }
1978
126348
          continue;
1979
        } else {
1980
395
          buffer += ch;
1981
        }
1982
395
        break;
1983
      case kPathStart:
1984
206390
        if (IsSpecial(url->scheme)) {
1985
206230
          state = kPath;
1986

206230
          if (ch != '/' && ch != '\\') {
1987
79002
            continue;
1988
          }
1989

160
        } else if (!has_state_override && ch == '?') {
1990
3
          url->flags |= URL_FLAGS_HAS_QUERY;
1991
3
          url->query.clear();
1992
3
          state = kQuery;
1993

157
        } else if (!has_state_override && ch == '#') {
1994
3
          url->flags |= URL_FLAGS_HAS_FRAGMENT;
1995
3
          url->fragment.clear();
1996
3
          state = kFragment;
1997
154
        } else if (ch != kEOL) {
1998
121
          state = kPath;
1999
121
          if (ch != '/') {
2000
6
            continue;
2001
          }
2002
        }
2003
127382
        break;
2004
      case kPath:
2005

15284320
        if (ch == kEOL ||
2006
13773240
            ch == '/' ||
2007
13773207
            special_back_slash ||
2008

18997017
            (!has_state_override && (ch == '?' || ch == '#'))) {
2009
1511232
          if (IsDoubleDotSegment(buffer)) {
2010
1016
            ShortenUrlPath(url);
2011

1016
            if (ch != '/' && !special_back_slash) {
2012
18
              url->flags |= URL_FLAGS_HAS_PATH;
2013
18
              url->path.emplace_back("");
2014
            }
2015

3020762
          } else if (IsSingleDotSegment(buffer) &&
2016

1510240
                     ch != '/' && !special_back_slash) {
2017
23
            url->flags |= URL_FLAGS_HAS_PATH;
2018
23
            url->path.emplace_back("");
2019
1510193
          } else if (!IsSingleDotSegment(buffer)) {
2020

4527549
            if (url->scheme == "file:" &&
2021
1688251
                url->path.empty() &&
2022

1690395
                buffer.size() == 2 &&
2023
35
                IsWindowsDriveLetter(buffer)) {
2024

49
              if ((url->flags & URL_FLAGS_HAS_HOST) &&
2025
15
                  !url->host.empty()) {
2026
3
                url->host.clear();
2027
3
                url->flags |= URL_FLAGS_HAS_HOST;
2028
              }
2029
34
              buffer[1] = ':';
2030
            }
2031
1509885
            url->flags |= URL_FLAGS_HAS_PATH;
2032
1509885
            url->path.emplace_back(std::move(buffer));
2033
          }
2034
1511232
          buffer.clear();
2035

1717379
          if (url->scheme == "file:" &&
2036
1302900
              (ch == kEOL ||
2037
1302888
               ch == '?' ||
2038
               ch == '#')) {
2039

255506
            while (url->path.size() > 1 && url->path[0].length() == 0) {
2040
24679
              url->path.erase(url->path.begin());
2041
            }
2042
          }
2043
3022462
          if (ch == '?') {
2044
108
            url->flags |= URL_FLAGS_HAS_QUERY;
2045
108
            state = kQuery;
2046
1511123
          } else if (ch == '#') {
2047
11
            state = kFragment;
2048
          }
2049
        } else {
2050
13773088
          AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2051
        }
2052
15284318
        break;
2053
      case kCannotBeBase:
2054
15971
        switch (ch) {
2055
          case '?':
2056
2
            state = kQuery;
2057
2
            break;
2058
          case '#':
2059
5
            state = kFragment;
2060
5
            break;
2061
          default:
2062
15964
            if (url->path.size() == 0)
2063
              url->path.emplace_back("");
2064

15964
            if (url->path.size() > 0 && ch != kEOL)
2065
15342
              AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2066
        }
2067
15971
        break;
2068
      case kQuery:
2069

2964
        if (ch == kEOL || (!has_state_override && ch == '#')) {
2070
254
          url->flags |= URL_FLAGS_HAS_QUERY;
2071
254
          url->query = std::move(buffer);
2072
254
          buffer.clear();
2073
508
          if (ch == '#')
2074
54
            state = kFragment;
2075
        } else {
2076
2710
          AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2077
2710
                                                QUERY_ENCODE_SET_NONSPECIAL);
2078
        }
2079
2964
        break;
2080
      case kFragment:
2081
575
        switch (ch) {
2082
          case kEOL:
2083
126
            url->flags |= URL_FLAGS_HAS_FRAGMENT;
2084
126
            url->fragment = std::move(buffer);
2085
126
            break;
2086
          case 0:
2087
2
            break;
2088
          default:
2089
447
            AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2090
        }
2091
575
        break;
2092
      default:
2093
        url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2094
        return;
2095
    }
2096
2097
16447514
    p++;
2098
  }
2099
}  // NOLINT(readability/fn_size)
2100
2101
namespace {
2102
158118
void SetArgs(Environment* env,
2103
             Local<Value> argv[ARG_COUNT],
2104
             const struct url_data& url) {
2105
158118
  Isolate* isolate = env->isolate();
2106
316236
  argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2107
158118
  argv[ARG_PROTOCOL] =
2108
158118
      url.flags & URL_FLAGS_SPECIAL ?
2109
157196
          GetSpecial(env, url.scheme) :
2110
631550
          OneByteString(isolate, url.scheme.c_str());
2111
158118
  if (url.flags & URL_FLAGS_HAS_USERNAME)
2112
164
    argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2113
158118
  if (url.flags & URL_FLAGS_HAS_PASSWORD)
2114
152
    argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2115
158118
  if (url.flags & URL_FLAGS_HAS_HOST)
2116
314746
    argv[ARG_HOST] = Utf8String(isolate, url.host);
2117
158118
  if (url.flags & URL_FLAGS_HAS_QUERY)
2118
512
    argv[ARG_QUERY] = Utf8String(isolate, url.query);
2119
158118
  if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2120
244
    argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2121
158118
  if (url.port > -1)
2122
926
    argv[ARG_PORT] = Integer::New(isolate, url.port);
2123
158118
  if (url.flags & URL_FLAGS_HAS_PATH)
2124
315700
    argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked();
2125
158118
}
2126
2127
159148
void Parse(Environment* env,
2128
           Local<Value> recv,
2129
           const char* input,
2130
           size_t len,
2131
           enum url_parse_state state_override,
2132
           Local<Value> base_obj,
2133
           Local<Value> context_obj,
2134
           Local<Function> cb,
2135
           Local<Value> error_cb) {
2136
159148
  Isolate* isolate = env->isolate();
2137
159148
  Local<Context> context = env->context();
2138
318279
  HandleScope handle_scope(isolate);
2139
159131
  Context::Scope context_scope(context);
2140
2141
159148
  const bool has_context = context_obj->IsObject();
2142
159148
  const bool has_base = base_obj->IsObject();
2143
2144
318279
  url_data base;
2145
318279
  url_data url;
2146
159148
  if (has_context)
2147
54210
    url = HarvestContext(env, context_obj.As<Object>());
2148
159148
  if (has_base)
2149
1046
    base = HarvestBase(env, base_obj.As<Object>());
2150
2151
159148
  URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2152

159148
  if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2153
54210
      ((state_override != kUnknownState) &&
2154
54210
       (url.flags & URL_FLAGS_TERMINATED)))
2155
17
    return;
2156
2157
  // Define the return value placeholders
2158
  const Local<Value> undef = Undefined(isolate);
2159
  const Local<Value> null = Null(isolate);
2160
159131
  if (!(url.flags & URL_FLAGS_FAILED)) {
2161
    Local<Value> argv[] = {
2162
      undef,
2163
      undef,
2164
      undef,
2165
      undef,
2166
      null,  // host defaults to null
2167
      null,  // port defaults to null
2168
      undef,
2169
      null,  // query defaults to null
2170
      null,  // fragment defaults to null
2171
157832
    };
2172
157832
    SetArgs(env, argv, url);
2173
473496
    cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2174
1299
  } else if (error_cb->IsFunction()) {
2175
1252
    Local<Value> argv[2] = { undef, undef };
2176
2504
    argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2177
1252
    argv[ERR_ARG_INPUT] =
2178
2504
      String::NewFromUtf8(env->isolate(),
2179
                          input,
2180
2504
                          NewStringType::kNormal).ToLocalChecked();
2181
5008
    error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2182
1252
        .FromMaybe(Local<Value>());
2183
  }
2184
}
2185
2186
159148
void Parse(const FunctionCallbackInfo<Value>& args) {
2187
159148
  Environment* env = Environment::GetCurrent(args);
2188
159148
  CHECK_GE(args.Length(), 5);
2189
477444
  CHECK(args[0]->IsString());  // input
2190


645304
  CHECK(args[2]->IsUndefined() ||  // base context
2191
        args[2]->IsNull() ||
2192
        args[2]->IsObject());
2193


748494
  CHECK(args[3]->IsUndefined() ||  // context
2194
        args[3]->IsNull() ||
2195
        args[3]->IsObject());
2196
318296
  CHECK(args[4]->IsFunction());  // complete callback
2197

687320
  CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
2198
2199
318296
  Utf8Value input(env->isolate(), args[0]);
2200
159148
  enum url_parse_state state_override = kUnknownState;
2201
318296
  if (args[1]->IsNumber()) {
2202
159148
    state_override = static_cast<enum url_parse_state>(
2203
795740
        args[1]->Uint32Value(env->context()).FromJust());
2204
  }
2205
2206
318296
  Parse(env, args.This(),
2207
159148
        *input, input.length(),
2208
        state_override,
2209
        args[2],
2210
        args[3],
2211
318296
        args[4].As<Function>(),
2212
159148
        args[5]);
2213
159148
}
2214
2215
22
void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2216
22
  Environment* env = Environment::GetCurrent(args);
2217
22
  CHECK_GE(args.Length(), 1);
2218
66
  CHECK(args[0]->IsString());
2219
44
  Utf8Value value(env->isolate(), args[0]);
2220
44
  std::string output;
2221
22
  size_t len = value.length();
2222
22
  output.reserve(len);
2223
233
  for (size_t n = 0; n < len; n++) {
2224
211
    const char ch = (*value)[n];
2225
211
    AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2226
  }
2227
44
  args.GetReturnValue().Set(
2228
44
      String::NewFromUtf8(env->isolate(),
2229
                          output.c_str(),
2230
22
                          NewStringType::kNormal).ToLocalChecked());
2231
22
}
2232
2233
11
void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2234
11
  Environment* env = Environment::GetCurrent(args);
2235
11
  CHECK_GE(args.Length(), 2);
2236
33
  CHECK(args[0]->IsString());
2237
22
  CHECK(args[1]->IsNumber());
2238
2239
22
  TwoByteValue value(env->isolate(), args[0]);
2240
2241
44
  int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2242
11
  CHECK_GE(start, 0);
2243
2244
32
  for (size_t i = start; i < value.length(); i++) {
2245
21
    char16_t c = value[i];
2246
21
    if (!IsUnicodeSurrogate(c)) {
2247
8
      continue;
2248

13
    } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) {
2249
12
      value[i] = kUnicodeReplacementCharacter;
2250
    } else {
2251
1
      char16_t d = value[i + 1];
2252
1
      if (IsUnicodeTrail(d)) {
2253
        i++;
2254
      } else {
2255
1
        value[i] = kUnicodeReplacementCharacter;
2256
      }
2257
    }
2258
  }
2259
2260
22
  args.GetReturnValue().Set(
2261
22
      String::NewFromTwoByte(env->isolate(),
2262
11
                             *value,
2263
                             NewStringType::kNormal,
2264
22
                             value.length()).ToLocalChecked());
2265
11
}
2266
2267
222
void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2268
222
  Environment* env = Environment::GetCurrent(args);
2269
222
  CHECK_GE(args.Length(), 1);
2270
666
  CHECK(args[0]->IsString());
2271
435
  Utf8Value value(env->isolate(), args[0]);
2272
2273
435
  URLHost host;
2274
  // Assuming the host is used for a special scheme.
2275
222
  host.ParseHost(*value, value.length(), true);
2276
222
  if (host.ParsingFailed()) {
2277
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2278
9
    return;
2279
  }
2280
426
  std::string out = host.ToStringMove();
2281
426
  args.GetReturnValue().Set(
2282
426
      String::NewFromUtf8(env->isolate(),
2283
                          out.c_str(),
2284
213
                          NewStringType::kNormal).ToLocalChecked());
2285
}
2286
2287
202
void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2288
202
  Environment* env = Environment::GetCurrent(args);
2289
202
  CHECK_GE(args.Length(), 1);
2290
606
  CHECK(args[0]->IsString());
2291
395
  Utf8Value value(env->isolate(), args[0]);
2292
2293
395
  URLHost host;
2294
  // Assuming the host is used for a special scheme.
2295
202
  host.ParseHost(*value, value.length(), true, true);
2296
202
  if (host.ParsingFailed()) {
2297
27
    args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2298
9
    return;
2299
  }
2300
386
  std::string out = host.ToStringMove();
2301
386
  args.GetReturnValue().Set(
2302
386
      String::NewFromUtf8(env->isolate(),
2303
                          out.c_str(),
2304
193
                          NewStringType::kNormal).ToLocalChecked());
2305
}
2306
2307
4377
void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2308
4377
  Environment* env = Environment::GetCurrent(args);
2309
4377
  CHECK_EQ(args.Length(), 1);
2310
8754
  CHECK(args[0]->IsFunction());
2311
8754
  env->set_url_constructor_function(args[0].As<Function>());
2312
4377
}
2313
2314
4377
void Initialize(Local<Object> target,
2315
                Local<Value> unused,
2316
                Local<Context> context,
2317
                void* priv) {
2318
4377
  Environment* env = Environment::GetCurrent(context);
2319
4377
  env->SetMethod(target, "parse", Parse);
2320
4377
  env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2321
4377
  env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2322
4377
  env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2323
4377
  env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2324
4377
  env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2325
2326
#define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2327
227604
  FLAGS(XX)
2328
#undef XX
2329
2330
#define XX(name) NODE_DEFINE_CONSTANT(target, name);
2331
367668
  PARSESTATES(XX)
2332
#undef XX
2333
4377
}
2334
}  // namespace
2335
2336
1384
std::string URL::ToFilePath() const {
2337
1384
  if (context_.scheme != "file:") {
2338
1
    return "";
2339
  }
2340
2341
#ifdef _WIN32
2342
  const char* slash = "\\";
2343
  auto is_slash = [] (char ch) {
2344
    return ch == '/' || ch == '\\';
2345
  };
2346
#else
2347
1383
  const char* slash = "/";
2348
109201
  auto is_slash = [] (char ch) {
2349
    return ch == '/';
2350
109201
  };
2351

2766
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2352
1383
      context_.host.length() > 0) {
2353
1
    return "";
2354
  }
2355
#endif
2356
2764
  std::string decoded_path;
2357
12210
  for (const std::string& part : context_.path) {
2358
21658
    std::string decoded = PercentDecode(part.c_str(), part.length());
2359
120029
    for (char& ch : decoded) {
2360
109201
      if (is_slash(ch)) {
2361
2
        return "";
2362
      }
2363
    }
2364
10828
    decoded_path += slash + decoded;
2365
  }
2366
2367
#ifdef _WIN32
2368
  // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2369
2370
  // If hostname is set, then we have a UNC path. Pass the hostname through
2371
  // ToUnicode just in case it is an IDN using punycode encoding. We do not
2372
  // need to worry about percent encoding because the URL parser will have
2373
  // already taken care of that for us. Note that this only causes IDNs with an
2374
  // appropriate `xn--` prefix to be decoded.
2375
  if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2376
      context_.host.length() > 0) {
2377
    std::string unicode_host;
2378
    if (!ToUnicode(context_.host, &unicode_host)) {
2379
      return "";
2380
    }
2381
    return "\\\\" + unicode_host + decoded_path;
2382
  }
2383
  // Otherwise, it's a local path that requires a drive letter.
2384
  if (decoded_path.length() < 3) {
2385
    return "";
2386
  }
2387
  if (decoded_path[2] != ':' ||
2388
      !IsASCIIAlpha(decoded_path[1])) {
2389
    return "";
2390
  }
2391
  // Strip out the leading '\'.
2392
  return decoded_path.substr(1);
2393
#else
2394
1380
  return decoded_path;
2395
#endif
2396
}
2397
2398
24618
URL URL::FromFilePath(const std::string& file_path) {
2399
24618
  URL url("file://");
2400
49236
  std::string escaped_file_path;
2401
2991569
  for (size_t i = 0; i < file_path.length(); ++i) {
2402
2966951
    escaped_file_path += file_path[i];
2403
2966951
    if (file_path[i] == '%')
2404
8
      escaped_file_path += "25";
2405
  }
2406
24618
  URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2407
24618
             &url.context_, true, nullptr, false);
2408
49236
  return url;
2409
}
2410
2411
// This function works by calling out to a JS function that creates and
2412
// returns the JS URL object. Be mindful of the JS<->Native boundary
2413
// crossing that is required.
2414
286
MaybeLocal<Value> URL::ToObject(Environment* env) const {
2415
286
  Isolate* isolate = env->isolate();
2416
286
  Local<Context> context = env->context();
2417
  Context::Scope context_scope(context);
2418
2419
  const Local<Value> undef = Undefined(isolate);
2420
  const Local<Value> null = Null(isolate);
2421
2422
286
  if (context_.flags & URL_FLAGS_FAILED)
2423
    return Local<Value>();
2424
2425
  Local<Value> argv[] = {
2426
    undef,
2427
    undef,
2428
    undef,
2429
    undef,
2430
    null,  // host defaults to null
2431
    null,  // port defaults to null
2432
    undef,
2433
    null,  // query defaults to null
2434
    null,  // fragment defaults to null
2435
286
  };
2436
286
  SetArgs(env, argv, context_);
2437
2438
  MaybeLocal<Value> ret;
2439
  {
2440
572
    TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal);
2441
2442
    // The SetURLConstructor method must have been called already to
2443
    // set the constructor function used below. SetURLConstructor is
2444
    // called automatically when the internal/url.js module is loaded
2445
    // during the internal/bootstrap/node.js processing.
2446
572
    ret = env->url_constructor_function()
2447
572
        ->Call(env->context(), undef, arraysize(argv), argv);
2448
  }
2449
2450
286
  return ret;
2451
}
2452
2453
}  // namespace url
2454
}  // namespace node
2455
2456
4185
NODE_MODULE_CONTEXT_AWARE_INTERNAL(url, node::url::Initialize)