GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 140 146 95.9 %
Date: 2020-07-19 22:14:24 Branches: 95 118 80.5 %

Line Branch Exec Source
1
#include "string_decoder.h"  // NOLINT(build/include_inline)
2
#include "string_decoder-inl.h"
3
4
#include "env-inl.h"
5
#include "node_buffer.h"
6
#include "node_external_reference.h"
7
#include "string_bytes.h"
8
#include "util.h"
9
10
using v8::Array;
11
using v8::ArrayBufferView;
12
using v8::Context;
13
using v8::FunctionCallbackInfo;
14
using v8::Integer;
15
using v8::Isolate;
16
using v8::Local;
17
using v8::MaybeLocal;
18
using v8::Object;
19
using v8::String;
20
using v8::Value;
21
22
namespace node {
23
24
namespace {
25
26
24252
MaybeLocal<String> MakeString(Isolate* isolate,
27
                              const char* data,
28
                              size_t length,
29
                              enum encoding encoding) {
30
  Local<Value> error;
31
  MaybeLocal<Value> ret;
32
24252
  if (encoding == UTF8) {
33
    return String::NewFromUtf8(
34
        isolate,
35
        data,
36
        v8::NewStringType::kNormal,
37
14603
        length);
38
  } else {
39
    ret = StringBytes::Encode(
40
        isolate,
41
        data,
42
        length,
43
        encoding,
44
9649
        &error);
45
  }
46
47
9649
  if (ret.IsEmpty()) {
48
    CHECK(!error.IsEmpty());
49
    isolate->ThrowException(error);
50
  }
51
52
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
53
19298
  return ret.FromMaybe(Local<Value>()).As<String>();
54
}
55
56
}  // anonymous namespace
57
58
59
25181
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
60
                                             const char* data,
61
                                             size_t* nread_ptr) {
62
  Local<String> prepend, body;
63
64
25181
  size_t nread = *nread_ptr;
65
66


25181
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
67
    // See if we want bytes to finish a character from the previous
68
    // chunk; if so, copy the new bytes to the missing bytes buffer
69
    // and create a small string from it that is to be prepended to the
70
    // main body.
71
19394
    if (MissingBytes() > 0) {
72
      // There are never more bytes missing than the pre-calculated maximum.
73
3489
      CHECK_LE(MissingBytes() + BufferedBytes(),
74
               kIncompleteCharactersEnd);
75
3489
      if (Encoding() == UTF8) {
76
        // For UTF-8, we need special treatment to align with the V8 decoder:
77
        // If an incomplete character is found at a chunk boundary, we use
78
        // its remainder and pass it to V8 as-is.
79

3278
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
80
1835
          if ((data[i] & 0xC0) != 0x80) {
81
            // This byte is not a continuation byte even though it should have
82
            // been one. We stop decoding of the incomplete character at this
83
            // point (but still use the rest of the incomplete bytes from this
84
            // chunk) and assume that the new, unexpected byte starts a new one.
85
98
            state_[kMissingBytes] = 0;
86
98
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
87
98
            state_[kBufferedBytes] += i;
88
98
            data += i;
89
98
            nread -= i;
90
98
            break;
91
          }
92
        }
93
      }
94
95
      size_t found_bytes =
96
3489
          std::min(nread, static_cast<size_t>(MissingBytes()));
97
3489
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
98
             data,
99
3489
             found_bytes);
100
      // Adjust the two buffers.
101
3489
      data += found_bytes;
102
3489
      nread -= found_bytes;
103
104
3489
      state_[kMissingBytes] -= found_bytes;
105
3489
      state_[kBufferedBytes] += found_bytes;
106
107
3489
      if (LIKELY(MissingBytes() == 0)) {
108
        // If no more bytes are missing, create a small string that we
109
        // will later prepend.
110
6276
        if (!MakeString(isolate,
111
3138
                        IncompleteCharacterBuffer(),
112
3138
                        BufferedBytes(),
113
9414
                        Encoding()).ToLocal(&prepend)) {
114
          return MaybeLocal<String>();
115
        }
116
117
3138
        *nread_ptr += BufferedBytes();
118
        // No more buffered bytes.
119
3138
        state_[kBufferedBytes] = 0;
120
      }
121
    }
122
123
    // It could be that trying to finish the previous chunk already
124
    // consumed all data that we received in this chunk.
125
19394
    if (UNLIKELY(nread == 0)) {
126
2516
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
127
2133
      prepend = Local<String>();
128
    } else {
129
      // If not, that means is no character left to finish at this point.
130
      DCHECK_EQ(MissingBytes(), 0);
131
      DCHECK_EQ(BufferedBytes(), 0);
132
133
      // See whether there is a character that we may have to cut off and
134
      // finish when receiving the next chunk.
135

17261
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
136
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
137
        // This means we'll need to figure out where the character to which
138
        // the byte belongs begins.
139
2871
        for (size_t i = nread - 1; ; --i) {
140
          DCHECK_LT(i, nread);
141
3913
          state_[kBufferedBytes]++;
142
2871
          if ((data[i] & 0xC0) == 0x80) {
143
            // This byte does not start a character (a "trailing" byte).
144

1092
            if (state_[kBufferedBytes] >= 4 || i == 0) {
145
              // We either have more then 4 trailing bytes (which means
146
              // the current character would not be inside the range for
147
              // valid Unicode, and in particular cannot be represented
148
              // through JavaScript's UTF-16-based approach to strings), or the
149
              // current buffer does not contain the start of an UTF-8 character
150
              // at all. Either way, this is invalid UTF8 and we can just
151
              // let the engine's decoder handle it.
152
50
              state_[kBufferedBytes] = 0;
153
50
              break;
154
            }
155
          } else {
156
            // Found the first byte of a UTF-8 character. By looking at the
157
            // upper bits we can tell how long the character *should* be.
158
1779
            if ((data[i] & 0xE0) == 0xC0) {
159
522
              state_[kMissingBytes] = 2;
160
1257
            } else if ((data[i] & 0xF0) == 0xE0) {
161
1118
              state_[kMissingBytes] = 3;
162
139
            } else if ((data[i] & 0xF8) == 0xF0) {
163
35
              state_[kMissingBytes] = 4;
164
            } else {
165
              // This lead byte would indicate a character outside of the
166
              // representable range.
167
104
              state_[kBufferedBytes] = 0;
168
104
              break;
169
            }
170
171
1675
            if (BufferedBytes() >= MissingBytes()) {
172
              // Received more or exactly as many trailing bytes than the lead
173
              // character would indicate. In the "==" case, we have valid
174
              // data and don't need to slice anything off;
175
              // in the ">" case, this is invalid UTF-8 anyway.
176
389
              state_[kMissingBytes] = 0;
177
389
              state_[kBufferedBytes] = 0;
178
            }
179
180
1675
            state_[kMissingBytes] -= state_[kBufferedBytes];
181
1675
            break;
182
          }
183
        }
184
15432
      } else if (Encoding() == UCS2) {
185
2600
        if ((nread % 2) == 1) {
186
          // We got half a codepoint, and need the second byte of it.
187
1646
          state_[kBufferedBytes] = 1;
188
1646
          state_[kMissingBytes] = 1;
189
954
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
190
          // Half a split UTF-16 character.
191
9
          state_[kBufferedBytes] = 2;
192
9
          state_[kMissingBytes] = 2;
193
        }
194
12832
      } else if (Encoding() == BASE64) {
195
574
        state_[kBufferedBytes] = nread % 3;
196
574
        if (state_[kBufferedBytes] > 0)
197
403
          state_[kMissingBytes] = 3 - BufferedBytes();
198
      }
199
200
17261
      if (BufferedBytes() > 0) {
201
        // Copy the requested number of buffered bytes from the end of the
202
        // input into the incomplete character buffer.
203
3344
        nread -= BufferedBytes();
204
3344
        *nread_ptr -= BufferedBytes();
205
3344
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
206
      }
207
208
17261
      if (nread > 0) {
209
30378
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
210
          return MaybeLocal<String>();
211
      } else {
212
2072
        body = String::Empty(isolate);
213
      }
214
    }
215
216
19394
    if (prepend.IsEmpty()) {
217
18006
      return body;
218
    } else {
219
2776
      return String::Concat(isolate, prepend, body);
220
    }
221
  } else {
222


5787
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
223
5787
    return MakeString(isolate, data, nread, Encoding());
224
  }
225
}
226
227
206
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
228


206
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
229
    CHECK_EQ(MissingBytes(), 0);
230
    CHECK_EQ(BufferedBytes(), 0);
231
  }
232
233

206
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
234
    // Ignore a single trailing byte, like the JS decoder does.
235
69
    state_[kMissingBytes]--;
236
69
    state_[kBufferedBytes]--;
237
  }
238
239
206
  if (BufferedBytes() == 0)
240
68
    return String::Empty(isolate);
241
242
  MaybeLocal<String> ret =
243
      MakeString(isolate,
244
138
                 IncompleteCharacterBuffer(),
245
138
                 BufferedBytes(),
246
414
                 Encoding());
247
248
138
  state_[kMissingBytes] = 0;
249
138
  state_[kBufferedBytes] = 0;
250
251
138
  return ret;
252
}
253
254
namespace {
255
256
25181
void DecodeData(const FunctionCallbackInfo<Value>& args) {
257
  StringDecoder* decoder =
258
25181
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
259
25181
  CHECK_NOT_NULL(decoder);
260
261
50362
  CHECK(args[1]->IsArrayBufferView());
262
50362
  ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>());
263
25181
  size_t length = content.length();
264
265
  MaybeLocal<String> ret =
266
50362
      decoder->DecodeData(args.GetIsolate(), content.data(), &length);
267
25181
  if (!ret.IsEmpty())
268
50362
    args.GetReturnValue().Set(ret.ToLocalChecked());
269
25181
}
270
271
206
void FlushData(const FunctionCallbackInfo<Value>& args) {
272
  StringDecoder* decoder =
273
206
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
274
206
  CHECK_NOT_NULL(decoder);
275
206
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
276
206
  if (!ret.IsEmpty())
277
412
    args.GetReturnValue().Set(ret.ToLocalChecked());
278
206
}
279
280
384
void InitializeStringDecoder(Local<Object> target,
281
                             Local<Value> unused,
282
                             Local<Context> context,
283
                             void* priv) {
284
384
  Environment* env = Environment::GetCurrent(context);
285
384
  Isolate* isolate = env->isolate();
286
287
#define SET_DECODER_CONSTANT(name)                                            \
288
  target->Set(context,                                                        \
289
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
290
              Integer::New(isolate, StringDecoder::name)).FromJust()
291
292
1536
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
293
1536
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
294
1536
  SET_DECODER_CONSTANT(kMissingBytes);
295
1536
  SET_DECODER_CONSTANT(kBufferedBytes);
296
1536
  SET_DECODER_CONSTANT(kEncodingField);
297
1536
  SET_DECODER_CONSTANT(kNumFields);
298
299
384
  Local<Array> encodings = Array::New(isolate);
300
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
301
  encodings->Set(context,                                                     \
302
                 static_cast<int32_t>(cname),                                 \
303
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
304
1152
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
305
1152
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
306
1152
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
307
1152
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
308
1152
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
309
1152
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
310
1152
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
311
312
768
  target->Set(context,
313
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
314
1152
              encodings).Check();
315
316
768
  target->Set(context,
317
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
318
1536
              Integer::New(isolate, sizeof(StringDecoder))).Check();
319
320
384
  env->SetMethod(target, "decode", DecodeData);
321
384
  env->SetMethod(target, "flush", FlushData);
322
384
}
323
324
}  // anonymous namespace
325
326
4920
void RegisterStringDecoderExternalReferences(
327
    ExternalReferenceRegistry* registry) {
328
4920
  registry->Register(DecodeData);
329
4920
  registry->Register(FlushData);
330
4920
}
331
332
}  // namespace node
333
334
4989
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
335
                                   node::InitializeStringDecoder)
336

19890
NODE_MODULE_EXTERNAL_REFERENCE(string_decoder,
337
                               node::RegisterStringDecoderExternalReferences)