GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
#include "string_decoder.h" // NOLINT(build/include_inline) |
||
2 |
#include "string_decoder-inl.h" |
||
3 |
|||
4 |
#include "env-inl.h" |
||
5 |
#include "node_buffer.h" |
||
6 |
#include "node_external_reference.h" |
||
7 |
#include "string_bytes.h" |
||
8 |
#include "util.h" |
||
9 |
|||
10 |
using v8::Array; |
||
11 |
using v8::ArrayBufferView; |
||
12 |
using v8::Context; |
||
13 |
using v8::FunctionCallbackInfo; |
||
14 |
using v8::Integer; |
||
15 |
using v8::Isolate; |
||
16 |
using v8::Local; |
||
17 |
using v8::MaybeLocal; |
||
18 |
using v8::Object; |
||
19 |
using v8::String; |
||
20 |
using v8::Value; |
||
21 |
|||
22 |
namespace node { |
||
23 |
|||
24 |
namespace { |
||
25 |
|||
26 |
25596 |
MaybeLocal<String> MakeString(Isolate* isolate, |
|
27 |
const char* data, |
||
28 |
size_t length, |
||
29 |
enum encoding encoding) { |
||
30 |
Local<Value> error; |
||
31 |
MaybeLocal<Value> ret; |
||
32 |
✓✓ | 25596 |
if (encoding == UTF8) { |
33 |
return String::NewFromUtf8( |
||
34 |
isolate, |
||
35 |
data, |
||
36 |
v8::NewStringType::kNormal, |
||
37 |
14699 |
length); |
|
38 |
} else { |
||
39 |
ret = StringBytes::Encode( |
||
40 |
isolate, |
||
41 |
data, |
||
42 |
length, |
||
43 |
encoding, |
||
44 |
10897 |
&error); |
|
45 |
} |
||
46 |
|||
47 |
✗✓ | 10897 |
if (ret.IsEmpty()) { |
48 |
CHECK(!error.IsEmpty()); |
||
49 |
isolate->ThrowException(error); |
||
50 |
} |
||
51 |
|||
52 |
DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString()); |
||
53 |
21794 |
return ret.FromMaybe(Local<Value>()).As<String>(); |
|
54 |
} |
||
55 |
|||
56 |
} // anonymous namespace |
||
57 |
|||
58 |
|||
59 |
26323 |
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate, |
|
60 |
const char* data, |
||
61 |
size_t* nread_ptr) { |
||
62 |
Local<String> prepend, body; |
||
63 |
|||
64 |
26323 |
size_t nread = *nread_ptr; |
|
65 |
|||
66 |
✓✓✓✓ ✓✓✓✓ |
26323 |
if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) { |
67 |
// See if we want bytes to finish a character from the previous |
||
68 |
// chunk; if so, copy the new bytes to the missing bytes buffer |
||
69 |
// and create a small string from it that is to be prepended to the |
||
70 |
// main body. |
||
71 |
✓✓ | 19844 |
if (MissingBytes() > 0) { |
72 |
// There are never more bytes missing than the pre-calculated maximum. |
||
73 |
✗✓ | 3688 |
CHECK_LE(MissingBytes() + BufferedBytes(), |
74 |
kIncompleteCharactersEnd); |
||
75 |
✓✓ | 3688 |
if (Encoding() == UTF8) { |
76 |
// For UTF-8, we need special treatment to align with the V8 decoder: |
||
77 |
// If an incomplete character is found at a chunk boundary, we use |
||
78 |
// its remainder and pass it to V8 as-is. |
||
79 |
✓✓✓✓ ✓✓ |
3334 |
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) { |
80 |
✓✓ | 1883 |
if ((data[i] & 0xC0) != 0x80) { |
81 |
// This byte is not a continuation byte even though it should have |
||
82 |
// been one. We stop decoding of the incomplete character at this |
||
83 |
// point (but still use the rest of the incomplete bytes from this |
||
84 |
// chunk) and assume that the new, unexpected byte starts a new one. |
||
85 |
135 |
state_[kMissingBytes] = 0; |
|
86 |
135 |
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i); |
|
87 |
135 |
state_[kBufferedBytes] += i; |
|
88 |
135 |
data += i; |
|
89 |
135 |
nread -= i; |
|
90 |
135 |
break; |
|
91 |
} |
||
92 |
} |
||
93 |
} |
||
94 |
|||
95 |
size_t found_bytes = |
||
96 |
3688 |
std::min(nread, static_cast<size_t>(MissingBytes())); |
|
97 |
3688 |
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), |
|
98 |
data, |
||
99 |
3688 |
found_bytes); |
|
100 |
// Adjust the two buffers. |
||
101 |
3688 |
data += found_bytes; |
|
102 |
3688 |
nread -= found_bytes; |
|
103 |
|||
104 |
3688 |
state_[kMissingBytes] -= found_bytes; |
|
105 |
3688 |
state_[kBufferedBytes] += found_bytes; |
|
106 |
|||
107 |
✓✓ | 3688 |
if (LIKELY(MissingBytes() == 0)) { |
108 |
// If no more bytes are missing, create a small string that we |
||
109 |
// will later prepend. |
||
110 |
✗✓ | 6656 |
if (!MakeString(isolate, |
111 |
3328 |
IncompleteCharacterBuffer(), |
|
112 |
3328 |
BufferedBytes(), |
|
113 |
9984 |
Encoding()).ToLocal(&prepend)) { |
|
114 |
return MaybeLocal<String>(); |
||
115 |
} |
||
116 |
|||
117 |
3328 |
*nread_ptr += BufferedBytes(); |
|
118 |
// No more buffered bytes. |
||
119 |
3328 |
state_[kBufferedBytes] = 0; |
|
120 |
} |
||
121 |
} |
||
122 |
|||
123 |
// It could be that trying to finish the previous chunk already |
||
124 |
// consumed all data that we received in this chunk. |
||
125 |
✓✓ | 19844 |
if (UNLIKELY(nread == 0)) { |
126 |
✓✓ | 2565 |
body = !prepend.IsEmpty() ? prepend : String::Empty(isolate); |
127 |
2162 |
prepend = Local<String>(); |
|
128 |
} else { |
||
129 |
// If not, that means is no character left to finish at this point. |
||
130 |
DCHECK_EQ(MissingBytes(), 0); |
||
131 |
DCHECK_EQ(BufferedBytes(), 0); |
||
132 |
|||
133 |
// See whether there is a character that we may have to cut off and |
||
134 |
// finish when receiving the next chunk. |
||
135 |
✓✓✓✓ ✓✓ |
17682 |
if (Encoding() == UTF8 && data[nread - 1] & 0x80) { |
136 |
// This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte. |
||
137 |
// This means we'll need to figure out where the character to which |
||
138 |
// the byte belongs begins. |
||
139 |
3013 |
for (size_t i = nread - 1; ; --i) { |
|
140 |
DCHECK_LT(i, nread); |
||
141 |
4110 |
state_[kBufferedBytes]++; |
|
142 |
✓✓ | 3013 |
if ((data[i] & 0xC0) == 0x80) { |
143 |
// This byte does not start a character (a "trailing" byte). |
||
144 |
✓✓✓✓ |
1149 |
if (state_[kBufferedBytes] >= 4 || i == 0) { |
145 |
// We either have more then 4 trailing bytes (which means |
||
146 |
// the current character would not be inside the range for |
||
147 |
// valid Unicode, and in particular cannot be represented |
||
148 |
// through JavaScript's UTF-16-based approach to strings), or the |
||
149 |
// current buffer does not contain the start of an UTF-8 character |
||
150 |
// at all. Either way, this is invalid UTF8 and we can just |
||
151 |
// let the engine's decoder handle it. |
||
152 |
52 |
state_[kBufferedBytes] = 0; |
|
153 |
52 |
break; |
|
154 |
} |
||
155 |
} else { |
||
156 |
// Found the first byte of a UTF-8 character. By looking at the |
||
157 |
// upper bits we can tell how long the character *should* be. |
||
158 |
✓✓ | 1864 |
if ((data[i] & 0xE0) == 0xC0) { |
159 |
554 |
state_[kMissingBytes] = 2; |
|
160 |
✓✓ | 1310 |
} else if ((data[i] & 0xF0) == 0xE0) { |
161 |
1135 |
state_[kMissingBytes] = 3; |
|
162 |
✓✓ | 175 |
} else if ((data[i] & 0xF8) == 0xF0) { |
163 |
53 |
state_[kMissingBytes] = 4; |
|
164 |
} else { |
||
165 |
// This lead byte would indicate a character outside of the |
||
166 |
// representable range. |
||
167 |
122 |
state_[kBufferedBytes] = 0; |
|
168 |
122 |
break; |
|
169 |
} |
||
170 |
|||
171 |
✓✓ | 1742 |
if (BufferedBytes() >= MissingBytes()) { |
172 |
// Received more or exactly as many trailing bytes than the lead |
||
173 |
// character would indicate. In the "==" case, we have valid |
||
174 |
// data and don't need to slice anything off; |
||
175 |
// in the ">" case, this is invalid UTF-8 anyway. |
||
176 |
400 |
state_[kMissingBytes] = 0; |
|
177 |
400 |
state_[kBufferedBytes] = 0; |
|
178 |
} |
||
179 |
|||
180 |
1742 |
state_[kMissingBytes] -= state_[kBufferedBytes]; |
|
181 |
1742 |
break; |
|
182 |
} |
||
183 |
} |
||
184 |
✓✓ | 15766 |
} else if (Encoding() == UCS2) { |
185 |
✓✓ | 2854 |
if ((nread % 2) == 1) { |
186 |
// We got half a codepoint, and need the second byte of it. |
||
187 |
1772 |
state_[kBufferedBytes] = 1; |
|
188 |
1772 |
state_[kMissingBytes] = 1; |
|
189 |
✓✓ | 1082 |
} else if ((data[nread - 1] & 0xFC) == 0xD8) { |
190 |
// Half a split UTF-16 character. |
||
191 |
10 |
state_[kBufferedBytes] = 2; |
|
192 |
10 |
state_[kMissingBytes] = 2; |
|
193 |
} |
||
194 |
✓✓ | 12912 |
} else if (Encoding() == BASE64) { |
195 |
702 |
state_[kBufferedBytes] = nread % 3; |
|
196 |
✓✓ | 702 |
if (state_[kBufferedBytes] > 0) |
197 |
482 |
state_[kMissingBytes] = 3 - BufferedBytes(); |
|
198 |
} |
||
199 |
|||
200 |
✓✓ | 17682 |
if (BufferedBytes() > 0) { |
201 |
// Copy the requested number of buffered bytes from the end of the |
||
202 |
// input into the incomplete character buffer. |
||
203 |
3606 |
nread -= BufferedBytes(); |
|
204 |
3606 |
*nread_ptr -= BufferedBytes(); |
|
205 |
3606 |
memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes()); |
|
206 |
} |
||
207 |
|||
208 |
✓✓ | 17682 |
if (nread > 0) { |
209 |
✗✓ | 31226 |
if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body)) |
210 |
return MaybeLocal<String>(); |
||
211 |
} else { |
||
212 |
2069 |
body = String::Empty(isolate); |
|
213 |
} |
||
214 |
} |
||
215 |
|||
216 |
✓✓ | 19844 |
if (prepend.IsEmpty()) { |
217 |
18275 |
return body; |
|
218 |
} else { |
||
219 |
3138 |
return String::Concat(isolate, prepend, body); |
|
220 |
} |
||
221 |
} else { |
||
222 |
✓✓✓✓ ✓✓✗✓ ✗✓ |
6479 |
CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1); |
223 |
6479 |
return MakeString(isolate, data, nread, Encoding()); |
|
224 |
} |
||
225 |
} |
||
226 |
|||
227 |
278 |
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) { |
|
228 |
✓✗✓✗ ✗✓✗✓ |
278 |
if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) { |
229 |
CHECK_EQ(MissingBytes(), 0); |
||
230 |
CHECK_EQ(BufferedBytes(), 0); |
||
231 |
} |
||
232 |
|||
233 |
✓✓✓✓ ✓✓ |
278 |
if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) { |
234 |
// Ignore a single trailing byte, like the JS decoder does. |
||
235 |
103 |
state_[kMissingBytes]--; |
|
236 |
103 |
state_[kBufferedBytes]--; |
|
237 |
} |
||
238 |
|||
239 |
✓✓ | 278 |
if (BufferedBytes() == 0) |
240 |
102 |
return String::Empty(isolate); |
|
241 |
|||
242 |
MaybeLocal<String> ret = |
||
243 |
MakeString(isolate, |
||
244 |
176 |
IncompleteCharacterBuffer(), |
|
245 |
176 |
BufferedBytes(), |
|
246 |
528 |
Encoding()); |
|
247 |
|||
248 |
176 |
state_[kMissingBytes] = 0; |
|
249 |
176 |
state_[kBufferedBytes] = 0; |
|
250 |
|||
251 |
176 |
return ret; |
|
252 |
} |
||
253 |
|||
254 |
namespace { |
||
255 |
|||
256 |
26323 |
void DecodeData(const FunctionCallbackInfo<Value>& args) { |
|
257 |
StringDecoder* decoder = |
||
258 |
26323 |
reinterpret_cast<StringDecoder*>(Buffer::Data(args[0])); |
|
259 |
✗✓ | 26323 |
CHECK_NOT_NULL(decoder); |
260 |
|||
261 |
✗✓ | 52646 |
CHECK(args[1]->IsArrayBufferView()); |
262 |
52646 |
ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>()); |
|
263 |
26323 |
size_t length = content.length(); |
|
264 |
|||
265 |
MaybeLocal<String> ret = |
||
266 |
52646 |
decoder->DecodeData(args.GetIsolate(), content.data(), &length); |
|
267 |
✓✗ | 26323 |
if (!ret.IsEmpty()) |
268 |
52646 |
args.GetReturnValue().Set(ret.ToLocalChecked()); |
|
269 |
26323 |
} |
|
270 |
|||
271 |
278 |
void FlushData(const FunctionCallbackInfo<Value>& args) { |
|
272 |
StringDecoder* decoder = |
||
273 |
278 |
reinterpret_cast<StringDecoder*>(Buffer::Data(args[0])); |
|
274 |
✗✓ | 278 |
CHECK_NOT_NULL(decoder); |
275 |
278 |
MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate()); |
|
276 |
✓✗ | 278 |
if (!ret.IsEmpty()) |
277 |
556 |
args.GetReturnValue().Set(ret.ToLocalChecked()); |
|
278 |
278 |
} |
|
279 |
|||
280 |
445 |
void InitializeStringDecoder(Local<Object> target, |
|
281 |
Local<Value> unused, |
||
282 |
Local<Context> context, |
||
283 |
void* priv) { |
||
284 |
445 |
Environment* env = Environment::GetCurrent(context); |
|
285 |
445 |
Isolate* isolate = env->isolate(); |
|
286 |
|||
287 |
#define SET_DECODER_CONSTANT(name) \ |
||
288 |
target->Set(context, \ |
||
289 |
FIXED_ONE_BYTE_STRING(isolate, #name), \ |
||
290 |
Integer::New(isolate, StringDecoder::name)).FromJust() |
||
291 |
|||
292 |
1780 |
SET_DECODER_CONSTANT(kIncompleteCharactersStart); |
|
293 |
1780 |
SET_DECODER_CONSTANT(kIncompleteCharactersEnd); |
|
294 |
1780 |
SET_DECODER_CONSTANT(kMissingBytes); |
|
295 |
1780 |
SET_DECODER_CONSTANT(kBufferedBytes); |
|
296 |
1780 |
SET_DECODER_CONSTANT(kEncodingField); |
|
297 |
1780 |
SET_DECODER_CONSTANT(kNumFields); |
|
298 |
|||
299 |
445 |
Local<Array> encodings = Array::New(isolate); |
|
300 |
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname) \ |
||
301 |
encodings->Set(context, \ |
||
302 |
static_cast<int32_t>(cname), \ |
||
303 |
FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust() |
||
304 |
1335 |
ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii"); |
|
305 |
1335 |
ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8"); |
|
306 |
1335 |
ADD_TO_ENCODINGS_ARRAY(BASE64, "base64"); |
|
307 |
1335 |
ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le"); |
|
308 |
1335 |
ADD_TO_ENCODINGS_ARRAY(HEX, "hex"); |
|
309 |
1335 |
ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer"); |
|
310 |
1335 |
ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1"); |
|
311 |
|||
312 |
890 |
target->Set(context, |
|
313 |
FIXED_ONE_BYTE_STRING(isolate, "encodings"), |
||
314 |
1335 |
encodings).Check(); |
|
315 |
|||
316 |
890 |
target->Set(context, |
|
317 |
FIXED_ONE_BYTE_STRING(isolate, "kSize"), |
||
318 |
1780 |
Integer::New(isolate, sizeof(StringDecoder))).Check(); |
|
319 |
|||
320 |
445 |
env->SetMethod(target, "decode", DecodeData); |
|
321 |
445 |
env->SetMethod(target, "flush", FlushData); |
|
322 |
445 |
} |
|
323 |
|||
324 |
} // anonymous namespace |
||
325 |
|||
326 |
4601 |
void RegisterStringDecoderExternalReferences( |
|
327 |
ExternalReferenceRegistry* registry) { |
||
328 |
4601 |
registry->Register(DecodeData); |
|
329 |
4601 |
registry->Register(FlushData); |
|
330 |
4601 |
} |
|
331 |
|||
332 |
} // namespace node |
||
333 |
|||
334 |
4670 |
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder, |
|
335 |
node::InitializeStringDecoder) |
||
336 |
✓✗✓✗ |
18635 |
NODE_MODULE_EXTERNAL_REFERENCE(string_decoder, |
337 |
node::RegisterStringDecoderExternalReferences) |
Generated by: GCOVR (Version 3.4) |