GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
#include "string_decoder.h" // NOLINT(build/include_inline) |
||
2 |
#include "string_decoder-inl.h" |
||
3 |
|||
4 |
#include "env-inl.h" |
||
5 |
#include "node_buffer.h" |
||
6 |
#include "node_errors.h" |
||
7 |
#include "node_external_reference.h" |
||
8 |
#include "string_bytes.h" |
||
9 |
#include "util.h" |
||
10 |
|||
11 |
using v8::Array; |
||
12 |
using v8::ArrayBufferView; |
||
13 |
using v8::Context; |
||
14 |
using v8::FunctionCallbackInfo; |
||
15 |
using v8::Integer; |
||
16 |
using v8::Isolate; |
||
17 |
using v8::Local; |
||
18 |
using v8::MaybeLocal; |
||
19 |
using v8::Object; |
||
20 |
using v8::String; |
||
21 |
using v8::Value; |
||
22 |
|||
23 |
namespace node { |
||
24 |
|||
25 |
namespace { |
||
26 |
|||
27 |
✓✓ | 26334 |
MaybeLocal<String> MakeString(Isolate* isolate, |
28 |
const char* data, |
||
29 |
size_t length, |
||
30 |
enum encoding encoding) { |
||
31 |
Local<Value> error; |
||
32 |
MaybeLocal<Value> ret; |
||
33 |
✓✓ | 26334 |
if (encoding == UTF8) { |
34 |
MaybeLocal<String> utf8_string = String::NewFromUtf8( |
||
35 |
isolate, |
||
36 |
data, |
||
37 |
v8::NewStringType::kNormal, |
||
38 |
15245 |
length); |
|
39 |
✓✓ | 15245 |
if (utf8_string.IsEmpty()) { |
40 |
1 |
isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate)); |
|
41 |
1 |
return MaybeLocal<String>(); |
|
42 |
} else { |
||
43 |
15244 |
return utf8_string; |
|
44 |
} |
||
45 |
} else { |
||
46 |
ret = StringBytes::Encode( |
||
47 |
isolate, |
||
48 |
data, |
||
49 |
length, |
||
50 |
encoding, |
||
51 |
11089 |
&error); |
|
52 |
} |
||
53 |
|||
54 |
✗✓ | 11089 |
if (ret.IsEmpty()) { |
55 |
CHECK(!error.IsEmpty()); |
||
56 |
isolate->ThrowException(error); |
||
57 |
} |
||
58 |
|||
59 |
DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString()); |
||
60 |
22178 |
return ret.FromMaybe(Local<Value>()).As<String>(); |
|
61 |
} |
||
62 |
|||
63 |
} // anonymous namespace |
||
64 |
|||
65 |
|||
66 |
26757 |
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate, |
|
67 |
const char* data, |
||
68 |
size_t* nread_ptr) { |
||
69 |
Local<String> prepend, body; |
||
70 |
|||
71 |
26757 |
size_t nread = *nread_ptr; |
|
72 |
|||
73 |
✓✓ | 37747 |
if (Encoding() == UTF8 || |
74 |
✓✓ | 18216 |
Encoding() == UCS2 || |
75 |
✓✓✓✓ ✓✓ |
44973 |
Encoding() == BASE64 || |
76 |
6321 |
Encoding() == BASE64URL) { |
|
77 |
// See if we want bytes to finish a character from the previous |
||
78 |
// chunk; if so, copy the new bytes to the missing bytes buffer |
||
79 |
// and create a small string from it that is to be prepended to the |
||
80 |
// main body. |
||
81 |
✓✓ | 21286 |
if (MissingBytes() > 0) { |
82 |
// There are never more bytes missing than the pre-calculated maximum. |
||
83 |
✗✓ | 4176 |
CHECK_LE(MissingBytes() + BufferedBytes(), |
84 |
kIncompleteCharactersEnd); |
||
85 |
✓✓ | 4176 |
if (Encoding() == UTF8) { |
86 |
// For UTF-8, we need special treatment to align with the V8 decoder: |
||
87 |
// If an incomplete character is found at a chunk boundary, we use |
||
88 |
// its remainder and pass it to V8 as-is. |
||
89 |
✓✓✓✓ ✓✓ |
3362 |
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) { |
90 |
✓✓ | 1909 |
if ((data[i] & 0xC0) != 0x80) { |
91 |
// This byte is not a continuation byte even though it should have |
||
92 |
// been one. We stop decoding of the incomplete character at this |
||
93 |
// point (but still use the rest of the incomplete bytes from this |
||
94 |
// chunk) and assume that the new, unexpected byte starts a new one. |
||
95 |
152 |
state_[kMissingBytes] = 0; |
|
96 |
152 |
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i); |
|
97 |
152 |
state_[kBufferedBytes] += i; |
|
98 |
152 |
data += i; |
|
99 |
152 |
nread -= i; |
|
100 |
152 |
break; |
|
101 |
} |
||
102 |
} |
||
103 |
} |
||
104 |
|||
105 |
size_t found_bytes = |
||
106 |
4176 |
std::min(nread, static_cast<size_t>(MissingBytes())); |
|
107 |
4176 |
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), |
|
108 |
data, |
||
109 |
found_bytes); |
||
110 |
// Adjust the two buffers. |
||
111 |
4176 |
data += found_bytes; |
|
112 |
4176 |
nread -= found_bytes; |
|
113 |
|||
114 |
4176 |
state_[kMissingBytes] -= found_bytes; |
|
115 |
4176 |
state_[kBufferedBytes] += found_bytes; |
|
116 |
|||
117 |
✓✓ | 4176 |
if (LIKELY(MissingBytes() == 0)) { |
118 |
// If no more bytes are missing, create a small string that we |
||
119 |
// will later prepend. |
||
120 |
3751 |
if (!MakeString(isolate, |
|
121 |
3751 |
IncompleteCharacterBuffer(), |
|
122 |
3751 |
BufferedBytes(), |
|
123 |
✗✓ | 11253 |
Encoding()).ToLocal(&prepend)) { |
124 |
return MaybeLocal<String>(); |
||
125 |
} |
||
126 |
|||
127 |
3751 |
*nread_ptr += BufferedBytes(); |
|
128 |
// No more buffered bytes. |
||
129 |
3751 |
state_[kBufferedBytes] = 0; |
|
130 |
} |
||
131 |
} |
||
132 |
|||
133 |
// It could be that trying to finish the previous chunk already |
||
134 |
// consumed all data that we received in this chunk. |
||
135 |
✓✓ | 21286 |
if (UNLIKELY(nread == 0)) { |
136 |
✓✓ | 2791 |
body = !prepend.IsEmpty() ? prepend : String::Empty(isolate); |
137 |
2303 |
prepend = Local<String>(); |
|
138 |
} else { |
||
139 |
// If not, that means is no character left to finish at this point. |
||
140 |
DCHECK_EQ(MissingBytes(), 0); |
||
141 |
DCHECK_EQ(BufferedBytes(), 0); |
||
142 |
|||
143 |
// See whether there is a character that we may have to cut off and |
||
144 |
// finish when receiving the next chunk. |
||
145 |
✓✓✓✓ ✓✓ |
18983 |
if (Encoding() == UTF8 && data[nread - 1] & 0x80) { |
146 |
// This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte. |
||
147 |
// This means we'll need to figure out where the character to which |
||
148 |
// the byte belongs begins. |
||
149 |
1979 |
for (size_t i = nread - 1; ; --i) { |
|
150 |
DCHECK_LT(i, nread); |
||
151 |
1164 |
state_[kBufferedBytes]++; |
|
152 |
✓✓ | 3143 |
if ((data[i] & 0xC0) == 0x80) { |
153 |
// This byte does not start a character (a "trailing" byte). |
||
154 |
✓✓✓✓ |
1217 |
if (state_[kBufferedBytes] >= 4 || i == 0) { |
155 |
// We either have more then 4 trailing bytes (which means |
||
156 |
// the current character would not be inside the range for |
||
157 |
// valid Unicode, and in particular cannot be represented |
||
158 |
// through JavaScript's UTF-16-based approach to strings), or the |
||
159 |
// current buffer does not contain the start of an UTF-8 character |
||
160 |
// at all. Either way, this is invalid UTF8 and we can just |
||
161 |
// let the engine's decoder handle it. |
||
162 |
53 |
state_[kBufferedBytes] = 0; |
|
163 |
53 |
break; |
|
164 |
} |
||
165 |
} else { |
||
166 |
// Found the first byte of a UTF-8 character. By looking at the |
||
167 |
// upper bits we can tell how long the character *should* be. |
||
168 |
✓✓ | 1926 |
if ((data[i] & 0xE0) == 0xC0) { |
169 |
573 |
state_[kMissingBytes] = 2; |
|
170 |
✓✓ | 1353 |
} else if ((data[i] & 0xF0) == 0xE0) { |
171 |
1143 |
state_[kMissingBytes] = 3; |
|
172 |
✓✓ | 210 |
} else if ((data[i] & 0xF8) == 0xF0) { |
173 |
53 |
state_[kMissingBytes] = 4; |
|
174 |
} else { |
||
175 |
// This lead byte would indicate a character outside of the |
||
176 |
// representable range. |
||
177 |
157 |
state_[kBufferedBytes] = 0; |
|
178 |
157 |
break; |
|
179 |
} |
||
180 |
|||
181 |
✓✓ | 1769 |
if (BufferedBytes() >= MissingBytes()) { |
182 |
// Received more or exactly as many trailing bytes than the lead |
||
183 |
// character would indicate. In the "==" case, we have valid |
||
184 |
// data and don't need to slice anything off; |
||
185 |
// in the ">" case, this is invalid UTF-8 anyway. |
||
186 |
404 |
state_[kMissingBytes] = 0; |
|
187 |
404 |
state_[kBufferedBytes] = 0; |
|
188 |
} |
||
189 |
|||
190 |
1769 |
state_[kMissingBytes] -= state_[kBufferedBytes]; |
|
191 |
1769 |
break; |
|
192 |
} |
||
193 |
} |
||
194 |
✓✓ | 17004 |
} else if (Encoding() == UCS2) { |
195 |
✓✓ | 2828 |
if ((nread % 2) == 1) { |
196 |
// We got half a codepoint, and need the second byte of it. |
||
197 |
1735 |
state_[kBufferedBytes] = 1; |
|
198 |
1735 |
state_[kMissingBytes] = 1; |
|
199 |
✓✓ | 1093 |
} else if ((data[nread - 1] & 0xFC) == 0xD8) { |
200 |
// Half a split UTF-16 character. |
||
201 |
10 |
state_[kBufferedBytes] = 2; |
|
202 |
10 |
state_[kMissingBytes] = 2; |
|
203 |
} |
||
204 |
✓✓✓✓ ✓✓ |
14176 |
} else if (Encoding() == BASE64 || Encoding() == BASE64URL) { |
205 |
1503 |
state_[kBufferedBytes] = nread % 3; |
|
206 |
✓✓ | 1503 |
if (state_[kBufferedBytes] > 0) |
207 |
1031 |
state_[kMissingBytes] = 3 - BufferedBytes(); |
|
208 |
} |
||
209 |
|||
210 |
✓✓ | 18983 |
if (BufferedBytes() > 0) { |
211 |
// Copy the requested number of buffered bytes from the end of the |
||
212 |
// input into the incomplete character buffer. |
||
213 |
4141 |
nread -= BufferedBytes(); |
|
214 |
4141 |
*nread_ptr -= BufferedBytes(); |
|
215 |
4141 |
memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes()); |
|
216 |
} |
||
217 |
|||
218 |
✓✓ | 18983 |
if (nread > 0) { |
219 |
✓✓ | 33616 |
if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body)) |
220 |
1 |
return MaybeLocal<String>(); |
|
221 |
} else { |
||
222 |
2175 |
body = String::Empty(isolate); |
|
223 |
} |
||
224 |
} |
||
225 |
|||
226 |
✓✓ | 21285 |
if (prepend.IsEmpty()) { |
227 |
19349 |
return body; |
|
228 |
} else { |
||
229 |
3872 |
return String::Concat(isolate, prepend, body); |
|
230 |
} |
||
231 |
} else { |
||
232 |
✓✓✓✓ ✓✓✗✓ ✗✓ |
5471 |
CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1); |
233 |
5471 |
return MakeString(isolate, data, nread, Encoding()); |
|
234 |
} |
||
235 |
} |
||
236 |
|||
237 |
390 |
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) { |
|
238 |
✓✗✓✗ ✗✓✗✓ |
390 |
if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) { |
239 |
CHECK_EQ(MissingBytes(), 0); |
||
240 |
CHECK_EQ(BufferedBytes(), 0); |
||
241 |
} |
||
242 |
|||
243 |
✓✓✓✓ ✓✓ |
390 |
if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) { |
244 |
// Ignore a single trailing byte, like the JS decoder does. |
||
245 |
87 |
state_[kMissingBytes]--; |
|
246 |
87 |
state_[kBufferedBytes]--; |
|
247 |
} |
||
248 |
|||
249 |
✓✓ | 390 |
if (BufferedBytes() == 0) |
250 |
86 |
return String::Empty(isolate); |
|
251 |
|||
252 |
MaybeLocal<String> ret = |
||
253 |
MakeString(isolate, |
||
254 |
304 |
IncompleteCharacterBuffer(), |
|
255 |
304 |
BufferedBytes(), |
|
256 |
608 |
Encoding()); |
|
257 |
|||
258 |
304 |
state_[kMissingBytes] = 0; |
|
259 |
304 |
state_[kBufferedBytes] = 0; |
|
260 |
|||
261 |
304 |
return ret; |
|
262 |
} |
||
263 |
|||
264 |
namespace { |
||
265 |
|||
266 |
✓✗ | 26757 |
void DecodeData(const FunctionCallbackInfo<Value>& args) { |
267 |
StringDecoder* decoder = |
||
268 |
26757 |
reinterpret_cast<StringDecoder*>(Buffer::Data(args[0])); |
|
269 |
✗✓ | 26757 |
CHECK_NOT_NULL(decoder); |
270 |
|||
271 |
✗✓ | 26757 |
CHECK(args[1]->IsArrayBufferView()); |
272 |
53514 |
ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>()); |
|
273 |
26757 |
size_t length = content.length(); |
|
274 |
|||
275 |
MaybeLocal<String> ret = |
||
276 |
53514 |
decoder->DecodeData(args.GetIsolate(), content.data(), &length); |
|
277 |
✓✓ | 26757 |
if (!ret.IsEmpty()) |
278 |
✗✓ | 53512 |
args.GetReturnValue().Set(ret.ToLocalChecked()); |
279 |
26757 |
} |
|
280 |
|||
281 |
✓✗ | 390 |
void FlushData(const FunctionCallbackInfo<Value>& args) { |
282 |
StringDecoder* decoder = |
||
283 |
390 |
reinterpret_cast<StringDecoder*>(Buffer::Data(args[0])); |
|
284 |
✗✓ | 390 |
CHECK_NOT_NULL(decoder); |
285 |
390 |
MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate()); |
|
286 |
✓✗ | 390 |
if (!ret.IsEmpty()) |
287 |
✗✓ | 780 |
args.GetReturnValue().Set(ret.ToLocalChecked()); |
288 |
390 |
} |
|
289 |
|||
290 |
784 |
void InitializeStringDecoder(Local<Object> target, |
|
291 |
Local<Value> unused, |
||
292 |
Local<Context> context, |
||
293 |
void* priv) { |
||
294 |
784 |
Environment* env = Environment::GetCurrent(context); |
|
295 |
784 |
Isolate* isolate = env->isolate(); |
|
296 |
|||
297 |
#define SET_DECODER_CONSTANT(name) \ |
||
298 |
target->Set(context, \ |
||
299 |
FIXED_ONE_BYTE_STRING(isolate, #name), \ |
||
300 |
Integer::New(isolate, StringDecoder::name)).FromJust() |
||
301 |
|||
302 |
3136 |
SET_DECODER_CONSTANT(kIncompleteCharactersStart); |
|
303 |
3136 |
SET_DECODER_CONSTANT(kIncompleteCharactersEnd); |
|
304 |
3136 |
SET_DECODER_CONSTANT(kMissingBytes); |
|
305 |
3136 |
SET_DECODER_CONSTANT(kBufferedBytes); |
|
306 |
3136 |
SET_DECODER_CONSTANT(kEncodingField); |
|
307 |
2352 |
SET_DECODER_CONSTANT(kNumFields); |
|
308 |
|||
309 |
784 |
Local<Array> encodings = Array::New(isolate); |
|
310 |
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname) \ |
||
311 |
encodings->Set(context, \ |
||
312 |
static_cast<int32_t>(cname), \ |
||
313 |
FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust() |
||
314 |
2352 |
ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii"); |
|
315 |
2352 |
ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8"); |
|
316 |
2352 |
ADD_TO_ENCODINGS_ARRAY(BASE64, "base64"); |
|
317 |
2352 |
ADD_TO_ENCODINGS_ARRAY(BASE64URL, "base64url"); |
|
318 |
2352 |
ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le"); |
|
319 |
2352 |
ADD_TO_ENCODINGS_ARRAY(HEX, "hex"); |
|
320 |
2352 |
ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer"); |
|
321 |
2352 |
ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1"); |
|
322 |
|||
323 |
784 |
target->Set(context, |
|
324 |
FIXED_ONE_BYTE_STRING(isolate, "encodings"), |
||
325 |
2352 |
encodings).Check(); |
|
326 |
|||
327 |
784 |
target->Set(context, |
|
328 |
FIXED_ONE_BYTE_STRING(isolate, "kSize"), |
||
329 |
2352 |
Integer::New(isolate, sizeof(StringDecoder))).Check(); |
|
330 |
|||
331 |
784 |
SetMethod(context, target, "decode", DecodeData); |
|
332 |
784 |
SetMethod(context, target, "flush", FlushData); |
|
333 |
784 |
} |
|
334 |
|||
335 |
} // anonymous namespace |
||
336 |
|||
337 |
5527 |
void RegisterStringDecoderExternalReferences( |
|
338 |
ExternalReferenceRegistry* registry) { |
||
339 |
5527 |
registry->Register(DecodeData); |
|
340 |
5527 |
registry->Register(FlushData); |
|
341 |
5527 |
} |
|
342 |
|||
343 |
} // namespace node |
||
344 |
|||
345 |
5597 |
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder, |
|
346 |
node::InitializeStringDecoder) |
||
347 |
5527 |
NODE_MODULE_EXTERNAL_REFERENCE(string_decoder, |
|
348 |
node::RegisterStringDecoderExternalReferences) |
Generated by: GCOVR (Version 4.2) |