GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 145 150 96.7 %
Date: 2021-01-16 04:10:54 Branches: 99 120 82.5 %

Line Branch Exec Source
1
#include "string_decoder.h"  // NOLINT(build/include_inline)
2
#include "string_decoder-inl.h"
3
4
#include "env-inl.h"
5
#include "node_buffer.h"
6
#include "node_errors.h"
7
#include "node_external_reference.h"
8
#include "string_bytes.h"
9
#include "util.h"
10
11
using v8::Array;
12
using v8::ArrayBufferView;
13
using v8::Context;
14
using v8::FunctionCallbackInfo;
15
using v8::Integer;
16
using v8::Isolate;
17
using v8::Local;
18
using v8::MaybeLocal;
19
using v8::Object;
20
using v8::String;
21
using v8::Value;
22
23
namespace node {
24
25
namespace {
26
27
24148
MaybeLocal<String> MakeString(Isolate* isolate,
28
                              const char* data,
29
                              size_t length,
30
                              enum encoding encoding) {
31
  Local<Value> error;
32
  MaybeLocal<Value> ret;
33
24148
  if (encoding == UTF8) {
34
    MaybeLocal<String> utf8_string = String::NewFromUtf8(
35
        isolate,
36
        data,
37
        v8::NewStringType::kNormal,
38
13492
        length);
39
13492
    if (utf8_string.IsEmpty()) {
40
1
      isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate));
41
1
      return MaybeLocal<String>();
42
    } else {
43
13491
      return utf8_string;
44
    }
45
  } else {
46
    ret = StringBytes::Encode(
47
        isolate,
48
        data,
49
        length,
50
        encoding,
51
10656
        &error);
52
  }
53
54
10656
  if (ret.IsEmpty()) {
55
    CHECK(!error.IsEmpty());
56
    isolate->ThrowException(error);
57
  }
58
59
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
60
21312
  return ret.FromMaybe(Local<Value>()).As<String>();
61
}
62
63
}  // anonymous namespace
64
65
66
24942
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
67
                                             const char* data,
68
                                             size_t* nread_ptr) {
69
  Local<String> prepend, body;
70
71
24942
  size_t nread = *nread_ptr;
72
73


24942
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
74
    // See if we want bytes to finish a character from the previous
75
    // chunk; if so, copy the new bytes to the missing bytes buffer
76
    // and create a small string from it that is to be prepended to the
77
    // main body.
78
18604
    if (MissingBytes() > 0) {
79
      // There are never more bytes missing than the pre-calculated maximum.
80
3652
      CHECK_LE(MissingBytes() + BufferedBytes(),
81
               kIncompleteCharactersEnd);
82
3652
      if (Encoding() == UTF8) {
83
        // For UTF-8, we need special treatment to align with the V8 decoder:
84
        // If an incomplete character is found at a chunk boundary, we use
85
        // its remainder and pass it to V8 as-is.
86

3330
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
87
1877
          if ((data[i] & 0xC0) != 0x80) {
88
            // This byte is not a continuation byte even though it should have
89
            // been one. We stop decoding of the incomplete character at this
90
            // point (but still use the rest of the incomplete bytes from this
91
            // chunk) and assume that the new, unexpected byte starts a new one.
92
123
            state_[kMissingBytes] = 0;
93
123
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
94
123
            state_[kBufferedBytes] += i;
95
123
            data += i;
96
123
            nread -= i;
97
123
            break;
98
          }
99
        }
100
      }
101
102
      size_t found_bytes =
103
3652
          std::min(nread, static_cast<size_t>(MissingBytes()));
104
3652
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
105
             data,
106
3652
             found_bytes);
107
      // Adjust the two buffers.
108
3652
      data += found_bytes;
109
3652
      nread -= found_bytes;
110
111
3652
      state_[kMissingBytes] -= found_bytes;
112
3652
      state_[kBufferedBytes] += found_bytes;
113
114
3652
      if (LIKELY(MissingBytes() == 0)) {
115
        // If no more bytes are missing, create a small string that we
116
        // will later prepend.
117
6566
        if (!MakeString(isolate,
118
3283
                        IncompleteCharacterBuffer(),
119
3283
                        BufferedBytes(),
120
9849
                        Encoding()).ToLocal(&prepend)) {
121
          return MaybeLocal<String>();
122
        }
123
124
3283
        *nread_ptr += BufferedBytes();
125
        // No more buffered bytes.
126
3283
        state_[kBufferedBytes] = 0;
127
      }
128
    }
129
130
    // It could be that trying to finish the previous chunk already
131
    // consumed all data that we received in this chunk.
132
18604
    if (UNLIKELY(nread == 0)) {
133
2586
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
134
2172
      prepend = Local<String>();
135
    } else {
136
      // If not, that means is no character left to finish at this point.
137
      DCHECK_EQ(MissingBytes(), 0);
138
      DCHECK_EQ(BufferedBytes(), 0);
139
140
      // See whether there is a character that we may have to cut off and
141
      // finish when receiving the next chunk.
142

16432
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
143
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
144
        // This means we'll need to figure out where the character to which
145
        // the byte belongs begins.
146
2968
        for (size_t i = nread - 1; ; --i) {
147
          DCHECK_LT(i, nread);
148
4045
          state_[kBufferedBytes]++;
149
2968
          if ((data[i] & 0xC0) == 0x80) {
150
            // This byte does not start a character (a "trailing" byte).
151

1127
            if (state_[kBufferedBytes] >= 4 || i == 0) {
152
              // We either have more then 4 trailing bytes (which means
153
              // the current character would not be inside the range for
154
              // valid Unicode, and in particular cannot be represented
155
              // through JavaScript's UTF-16-based approach to strings), or the
156
              // current buffer does not contain the start of an UTF-8 character
157
              // at all. Either way, this is invalid UTF8 and we can just
158
              // let the engine's decoder handle it.
159
50
              state_[kBufferedBytes] = 0;
160
50
              break;
161
            }
162
          } else {
163
            // Found the first byte of a UTF-8 character. By looking at the
164
            // upper bits we can tell how long the character *should* be.
165
1841
            if ((data[i] & 0xE0) == 0xC0) {
166
546
              state_[kMissingBytes] = 2;
167
1295
            } else if ((data[i] & 0xF0) == 0xE0) {
168
1123
              state_[kMissingBytes] = 3;
169
172
            } else if ((data[i] & 0xF8) == 0xF0) {
170
52
              state_[kMissingBytes] = 4;
171
            } else {
172
              // This lead byte would indicate a character outside of the
173
              // representable range.
174
120
              state_[kBufferedBytes] = 0;
175
120
              break;
176
            }
177
178
1721
            if (BufferedBytes() >= MissingBytes()) {
179
              // Received more or exactly as many trailing bytes than the lead
180
              // character would indicate. In the "==" case, we have valid
181
              // data and don't need to slice anything off;
182
              // in the ">" case, this is invalid UTF-8 anyway.
183
399
              state_[kMissingBytes] = 0;
184
399
              state_[kBufferedBytes] = 0;
185
            }
186
187
1721
            state_[kMissingBytes] -= state_[kBufferedBytes];
188
1721
            break;
189
          }
190
        }
191
14541
      } else if (Encoding() == UCS2) {
192
2898
        if ((nread % 2) == 1) {
193
          // We got half a codepoint, and need the second byte of it.
194
1793
          state_[kBufferedBytes] = 1;
195
1793
          state_[kMissingBytes] = 1;
196
1105
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
197
          // Half a split UTF-16 character.
198
13
          state_[kBufferedBytes] = 2;
199
13
          state_[kMissingBytes] = 2;
200
        }
201
11643
      } else if (Encoding() == BASE64) {
202
595
        state_[kBufferedBytes] = nread % 3;
203
595
        if (state_[kBufferedBytes] > 0)
204
420
          state_[kMissingBytes] = 3 - BufferedBytes();
205
      }
206
207
16432
      if (BufferedBytes() > 0) {
208
        // Copy the requested number of buffered bytes from the end of the
209
        // input into the incomplete character buffer.
210
3548
        nread -= BufferedBytes();
211
3548
        *nread_ptr -= BufferedBytes();
212
3548
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
213
      }
214
215
16432
      if (nread > 0) {
216
28728
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
217
1
          return MaybeLocal<String>();
218
      } else {
219
2068
        body = String::Empty(isolate);
220
      }
221
    }
222
223
18603
    if (prepend.IsEmpty()) {
224
17078
      return body;
225
    } else {
226
3050
      return String::Concat(isolate, prepend, body);
227
    }
228
  } else {
229


6338
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
230
6338
    return MakeString(isolate, data, nread, Encoding());
231
  }
232
}
233
234
265
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
235


265
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
236
    CHECK_EQ(MissingBytes(), 0);
237
    CHECK_EQ(BufferedBytes(), 0);
238
  }
239
240

265
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
241
    // Ignore a single trailing byte, like the JS decoder does.
242
103
    state_[kMissingBytes]--;
243
103
    state_[kBufferedBytes]--;
244
  }
245
246
265
  if (BufferedBytes() == 0)
247
102
    return String::Empty(isolate);
248
249
  MaybeLocal<String> ret =
250
      MakeString(isolate,
251
163
                 IncompleteCharacterBuffer(),
252
163
                 BufferedBytes(),
253
489
                 Encoding());
254
255
163
  state_[kMissingBytes] = 0;
256
163
  state_[kBufferedBytes] = 0;
257
258
163
  return ret;
259
}
260
261
namespace {
262
263
24942
void DecodeData(const FunctionCallbackInfo<Value>& args) {
264
  StringDecoder* decoder =
265
24942
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
266
24942
  CHECK_NOT_NULL(decoder);
267
268
49884
  CHECK(args[1]->IsArrayBufferView());
269
49884
  ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>());
270
24942
  size_t length = content.length();
271
272
  MaybeLocal<String> ret =
273
49884
      decoder->DecodeData(args.GetIsolate(), content.data(), &length);
274
24942
  if (!ret.IsEmpty())
275
49882
    args.GetReturnValue().Set(ret.ToLocalChecked());
276
24942
}
277
278
265
void FlushData(const FunctionCallbackInfo<Value>& args) {
279
  StringDecoder* decoder =
280
265
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
281
265
  CHECK_NOT_NULL(decoder);
282
265
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
283
265
  if (!ret.IsEmpty())
284
530
    args.GetReturnValue().Set(ret.ToLocalChecked());
285
265
}
286
287
439
void InitializeStringDecoder(Local<Object> target,
288
                             Local<Value> unused,
289
                             Local<Context> context,
290
                             void* priv) {
291
439
  Environment* env = Environment::GetCurrent(context);
292
439
  Isolate* isolate = env->isolate();
293
294
#define SET_DECODER_CONSTANT(name)                                            \
295
  target->Set(context,                                                        \
296
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
297
              Integer::New(isolate, StringDecoder::name)).FromJust()
298
299
1756
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
300
1756
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
301
1756
  SET_DECODER_CONSTANT(kMissingBytes);
302
1756
  SET_DECODER_CONSTANT(kBufferedBytes);
303
1756
  SET_DECODER_CONSTANT(kEncodingField);
304
1756
  SET_DECODER_CONSTANT(kNumFields);
305
306
439
  Local<Array> encodings = Array::New(isolate);
307
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
308
  encodings->Set(context,                                                     \
309
                 static_cast<int32_t>(cname),                                 \
310
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
311
1317
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
312
1317
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
313
1317
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
314
1317
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
315
1317
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
316
1317
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
317
1317
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
318
319
878
  target->Set(context,
320
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
321
1317
              encodings).Check();
322
323
878
  target->Set(context,
324
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
325
1756
              Integer::New(isolate, sizeof(StringDecoder))).Check();
326
327
439
  env->SetMethod(target, "decode", DecodeData);
328
439
  env->SetMethod(target, "flush", FlushData);
329
439
}
330
331
}  // anonymous namespace
332
333
4591
void RegisterStringDecoderExternalReferences(
334
    ExternalReferenceRegistry* registry) {
335
4591
  registry->Register(DecodeData);
336
4591
  registry->Register(FlushData);
337
4591
}
338
339
}  // namespace node
340
341
4657
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
342
                                   node::InitializeStringDecoder)
343

18586
NODE_MODULE_EXTERNAL_REFERENCE(string_decoder,
344
                               node::RegisterStringDecoderExternalReferences)