GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 149 154 96.8 %
Date: 2021-04-20 04:11:54 Branches: 105 126 83.3 %

Line Branch Exec Source
1
#include "string_decoder.h"  // NOLINT(build/include_inline)
2
#include "string_decoder-inl.h"
3
4
#include "env-inl.h"
5
#include "node_buffer.h"
6
#include "node_errors.h"
7
#include "node_external_reference.h"
8
#include "string_bytes.h"
9
#include "util.h"
10
11
using v8::Array;
12
using v8::ArrayBufferView;
13
using v8::Context;
14
using v8::FunctionCallbackInfo;
15
using v8::Integer;
16
using v8::Isolate;
17
using v8::Local;
18
using v8::MaybeLocal;
19
using v8::Object;
20
using v8::String;
21
using v8::Value;
22
23
namespace node {
24
25
namespace {
26
27
25504
MaybeLocal<String> MakeString(Isolate* isolate,
28
                              const char* data,
29
                              size_t length,
30
                              enum encoding encoding) {
31
  Local<Value> error;
32
  MaybeLocal<Value> ret;
33
25504
  if (encoding == UTF8) {
34
    MaybeLocal<String> utf8_string = String::NewFromUtf8(
35
        isolate,
36
        data,
37
        v8::NewStringType::kNormal,
38
13504
        length);
39
13504
    if (utf8_string.IsEmpty()) {
40
1
      isolate->ThrowException(node::ERR_STRING_TOO_LONG(isolate));
41
1
      return MaybeLocal<String>();
42
    } else {
43
13503
      return utf8_string;
44
    }
45
  } else {
46
    ret = StringBytes::Encode(
47
        isolate,
48
        data,
49
        length,
50
        encoding,
51
12000
        &error);
52
  }
53
54
12000
  if (ret.IsEmpty()) {
55
    CHECK(!error.IsEmpty());
56
    isolate->ThrowException(error);
57
  }
58
59
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
60
24000
  return ret.FromMaybe(Local<Value>()).As<String>();
61
}
62
63
}  // anonymous namespace
64
65
66
25899
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
67
                                             const char* data,
68
                                             size_t* nread_ptr) {
69
  Local<String> prepend, body;
70
71
25899
  size_t nread = *nread_ptr;
72
73

63667
  if (Encoding() == UTF8 ||
74
19964
      Encoding() == UCS2 ||
75

41124
      Encoding() == BASE64 ||
76
7130
      Encoding() == BASE64URL) {
77
    // See if we want bytes to finish a character from the previous
78
    // chunk; if so, copy the new bytes to the missing bytes buffer
79
    // and create a small string from it that is to be prepended to the
80
    // main body.
81
19523
    if (MissingBytes() > 0) {
82
      // There are never more bytes missing than the pre-calculated maximum.
83
4194
      CHECK_LE(MissingBytes() + BufferedBytes(),
84
               kIncompleteCharactersEnd);
85
4194
      if (Encoding() == UTF8) {
86
        // For UTF-8, we need special treatment to align with the V8 decoder:
87
        // If an incomplete character is found at a chunk boundary, we use
88
        // its remainder and pass it to V8 as-is.
89

3351
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
90
1900
          if ((data[i] & 0xC0) != 0x80) {
91
            // This byte is not a continuation byte even though it should have
92
            // been one. We stop decoding of the incomplete character at this
93
            // point (but still use the rest of the incomplete bytes from this
94
            // chunk) and assume that the new, unexpected byte starts a new one.
95
146
            state_[kMissingBytes] = 0;
96
146
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
97
146
            state_[kBufferedBytes] += i;
98
146
            data += i;
99
146
            nread -= i;
100
146
            break;
101
          }
102
        }
103
      }
104
105
      size_t found_bytes =
106
4194
          std::min(nread, static_cast<size_t>(MissingBytes()));
107
4194
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
108
             data,
109
4194
             found_bytes);
110
      // Adjust the two buffers.
111
4194
      data += found_bytes;
112
4194
      nread -= found_bytes;
113
114
4194
      state_[kMissingBytes] -= found_bytes;
115
4194
      state_[kBufferedBytes] += found_bytes;
116
117
4194
      if (LIKELY(MissingBytes() == 0)) {
118
        // If no more bytes are missing, create a small string that we
119
        // will later prepend.
120
7536
        if (!MakeString(isolate,
121
3768
                        IncompleteCharacterBuffer(),
122
3768
                        BufferedBytes(),
123
11304
                        Encoding()).ToLocal(&prepend)) {
124
          return MaybeLocal<String>();
125
        }
126
127
3768
        *nread_ptr += BufferedBytes();
128
        // No more buffered bytes.
129
3768
        state_[kBufferedBytes] = 0;
130
      }
131
    }
132
133
    // It could be that trying to finish the previous chunk already
134
    // consumed all data that we received in this chunk.
135
19523
    if (UNLIKELY(nread == 0)) {
136
2770
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
137
2290
      prepend = Local<String>();
138
    } else {
139
      // If not, that means is no character left to finish at this point.
140
      DCHECK_EQ(MissingBytes(), 0);
141
      DCHECK_EQ(BufferedBytes(), 0);
142
143
      // See whether there is a character that we may have to cut off and
144
      // finish when receiving the next chunk.
145

17233
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
146
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
147
        // This means we'll need to figure out where the character to which
148
        // the byte belongs begins.
149
2992
        for (size_t i = nread - 1; ; --i) {
150
          DCHECK_LT(i, nread);
151
4070
          state_[kBufferedBytes]++;
152
2992
          if ((data[i] & 0xC0) == 0x80) {
153
            // This byte does not start a character (a "trailing" byte).
154

1125
            if (state_[kBufferedBytes] >= 4 || i == 0) {
155
              // We either have more then 4 trailing bytes (which means
156
              // the current character would not be inside the range for
157
              // valid Unicode, and in particular cannot be represented
158
              // through JavaScript's UTF-16-based approach to strings), or the
159
              // current buffer does not contain the start of an UTF-8 character
160
              // at all. Either way, this is invalid UTF8 and we can just
161
              // let the engine's decoder handle it.
162
47
              state_[kBufferedBytes] = 0;
163
47
              break;
164
            }
165
          } else {
166
            // Found the first byte of a UTF-8 character. By looking at the
167
            // upper bits we can tell how long the character *should* be.
168
1867
            if ((data[i] & 0xE0) == 0xC0) {
169
545
              state_[kMissingBytes] = 2;
170
1322
            } else if ((data[i] & 0xF0) == 0xE0) {
171
1145
              state_[kMissingBytes] = 3;
172
177
            } else if ((data[i] & 0xF8) == 0xF0) {
173
49
              state_[kMissingBytes] = 4;
174
            } else {
175
              // This lead byte would indicate a character outside of the
176
              // representable range.
177
128
              state_[kBufferedBytes] = 0;
178
128
              break;
179
            }
180
181
1739
            if (BufferedBytes() >= MissingBytes()) {
182
              // Received more or exactly as many trailing bytes than the lead
183
              // character would indicate. In the "==" case, we have valid
184
              // data and don't need to slice anything off;
185
              // in the ">" case, this is invalid UTF-8 anyway.
186
391
              state_[kMissingBytes] = 0;
187
391
              state_[kBufferedBytes] = 0;
188
            }
189
190
1739
            state_[kMissingBytes] -= state_[kBufferedBytes];
191
1739
            break;
192
          }
193
        }
194
15319
      } else if (Encoding() == UCS2) {
195
2828
        if ((nread % 2) == 1) {
196
          // We got half a codepoint, and need the second byte of it.
197
1757
          state_[kBufferedBytes] = 1;
198
1757
          state_[kMissingBytes] = 1;
199
1071
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
200
          // Half a split UTF-16 character.
201
10
          state_[kBufferedBytes] = 2;
202
10
          state_[kMissingBytes] = 2;
203
        }
204

12491
      } else if (Encoding() == BASE64 || Encoding() == BASE64URL) {
205
1480
        state_[kBufferedBytes] = nread % 3;
206
1480
        if (state_[kBufferedBytes] > 0)
207
1061
          state_[kMissingBytes] = 3 - BufferedBytes();
208
      }
209
210
17233
      if (BufferedBytes() > 0) {
211
        // Copy the requested number of buffered bytes from the end of the
212
        // input into the incomplete character buffer.
213
4176
        nread -= BufferedBytes();
214
4176
        *nread_ptr -= BufferedBytes();
215
4176
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
216
      }
217
218
17233
      if (nread > 0) {
219
30118
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
220
1
          return MaybeLocal<String>();
221
      } else {
222
2174
        body = String::Empty(isolate);
223
      }
224
    }
225
226
19522
    if (prepend.IsEmpty()) {
227
17564
      return body;
228
    } else {
229
3916
      return String::Concat(isolate, prepend, body);
230
    }
231
  } else {
232


6376
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
233
6376
    return MakeString(isolate, data, nread, Encoding());
234
  }
235
}
236
237
408
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
238


408
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
239
    CHECK_EQ(MissingBytes(), 0);
240
    CHECK_EQ(BufferedBytes(), 0);
241
  }
242
243

408
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
244
    // Ignore a single trailing byte, like the JS decoder does.
245
108
    state_[kMissingBytes]--;
246
108
    state_[kBufferedBytes]--;
247
  }
248
249
408
  if (BufferedBytes() == 0)
250
107
    return String::Empty(isolate);
251
252
  MaybeLocal<String> ret =
253
      MakeString(isolate,
254
301
                 IncompleteCharacterBuffer(),
255
301
                 BufferedBytes(),
256
903
                 Encoding());
257
258
301
  state_[kMissingBytes] = 0;
259
301
  state_[kBufferedBytes] = 0;
260
261
301
  return ret;
262
}
263
264
namespace {
265
266
25899
void DecodeData(const FunctionCallbackInfo<Value>& args) {
267
  StringDecoder* decoder =
268
25899
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
269
25899
  CHECK_NOT_NULL(decoder);
270
271
51798
  CHECK(args[1]->IsArrayBufferView());
272
51798
  ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>());
273
25899
  size_t length = content.length();
274
275
  MaybeLocal<String> ret =
276
51798
      decoder->DecodeData(args.GetIsolate(), content.data(), &length);
277
25899
  if (!ret.IsEmpty())
278
51796
    args.GetReturnValue().Set(ret.ToLocalChecked());
279
25899
}
280
281
408
void FlushData(const FunctionCallbackInfo<Value>& args) {
282
  StringDecoder* decoder =
283
408
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
284
408
  CHECK_NOT_NULL(decoder);
285
408
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
286
408
  if (!ret.IsEmpty())
287
816
    args.GetReturnValue().Set(ret.ToLocalChecked());
288
408
}
289
290
462
void InitializeStringDecoder(Local<Object> target,
291
                             Local<Value> unused,
292
                             Local<Context> context,
293
                             void* priv) {
294
462
  Environment* env = Environment::GetCurrent(context);
295
462
  Isolate* isolate = env->isolate();
296
297
#define SET_DECODER_CONSTANT(name)                                            \
298
  target->Set(context,                                                        \
299
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
300
              Integer::New(isolate, StringDecoder::name)).FromJust()
301
302
1848
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
303
1848
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
304
1848
  SET_DECODER_CONSTANT(kMissingBytes);
305
1848
  SET_DECODER_CONSTANT(kBufferedBytes);
306
1848
  SET_DECODER_CONSTANT(kEncodingField);
307
1848
  SET_DECODER_CONSTANT(kNumFields);
308
309
462
  Local<Array> encodings = Array::New(isolate);
310
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
311
  encodings->Set(context,                                                     \
312
                 static_cast<int32_t>(cname),                                 \
313
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
314
1386
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
315
1386
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
316
1386
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
317
1386
  ADD_TO_ENCODINGS_ARRAY(BASE64URL, "base64url");
318
1386
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
319
1386
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
320
1386
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
321
1386
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
322
323
924
  target->Set(context,
324
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
325
1386
              encodings).Check();
326
327
924
  target->Set(context,
328
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
329
1848
              Integer::New(isolate, sizeof(StringDecoder))).Check();
330
331
462
  env->SetMethod(target, "decode", DecodeData);
332
462
  env->SetMethod(target, "flush", FlushData);
333
462
}
334
335
}  // anonymous namespace
336
337
4703
void RegisterStringDecoderExternalReferences(
338
    ExternalReferenceRegistry* registry) {
339
4703
  registry->Register(DecodeData);
340
4703
  registry->Register(FlushData);
341
4703
}
342
343
}  // namespace node
344
345
4762
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
346
                                   node::InitializeStringDecoder)
347

19013
NODE_MODULE_EXTERNAL_REFERENCE(string_decoder,
348
                               node::RegisterStringDecoderExternalReferences)