GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 134 140 95.7 %
Date: 2019-09-25 22:36:03 Branches: 91 112 81.3 %

Line Branch Exec Source
1
#include "string_decoder.h"  // NOLINT(build/include_inline)
2
#include "string_decoder-inl.h"
3
4
#include "env-inl.h"
5
#include "node_buffer.h"
6
#include "string_bytes.h"
7
#include "util.h"
8
9
using v8::Array;
10
using v8::ArrayBufferView;
11
using v8::Context;
12
using v8::FunctionCallbackInfo;
13
using v8::Integer;
14
using v8::Isolate;
15
using v8::Local;
16
using v8::MaybeLocal;
17
using v8::Object;
18
using v8::String;
19
using v8::Value;
20
21
namespace node {
22
23
namespace {
24
25
49798
MaybeLocal<String> MakeString(Isolate* isolate,
26
                              const char* data,
27
                              size_t length,
28
                              enum encoding encoding) {
29
  Local<Value> error;
30
  MaybeLocal<Value> ret;
31
49798
  if (encoding == UTF8) {
32
    return String::NewFromUtf8(
33
        isolate,
34
        data,
35
        v8::NewStringType::kNormal,
36
38835
        length);
37
  } else {
38
    ret = StringBytes::Encode(
39
        isolate,
40
        data,
41
        length,
42
        encoding,
43
10963
        &error);
44
  }
45
46
10963
  if (ret.IsEmpty()) {
47
    CHECK(!error.IsEmpty());
48
    isolate->ThrowException(error);
49
  }
50
51
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
52
21926
  return ret.FromMaybe(Local<Value>()).As<String>();
53
}
54
55
}  // anonymous namespace
56
57
58
50997
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
59
                                             const char* data,
60
                                             size_t* nread_ptr) {
61
  Local<String> prepend, body;
62
63
50997
  size_t nread = *nread_ptr;
64
65


50997
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
66
    // See if we want bytes to finish a character from the previous
67
    // chunk; if so, copy the new bytes to the missing bytes buffer
68
    // and create a small string from it that is to be prepended to the
69
    // main body.
70
44063
    if (MissingBytes() > 0) {
71
      // There are never more bytes missing than the pre-calculated maximum.
72
3547
      CHECK_LE(MissingBytes() + BufferedBytes(),
73
               kIncompleteCharactersEnd);
74
3547
      if (Encoding() == UTF8) {
75
        // For UTF-8, we need special treatment to align with the V8 decoder:
76
        // If an incomplete character is found at a chunk boundary, we use
77
        // its remainder and pass it to V8 as-is.
78

3318
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
79
1869
          if ((data[i] & 0xC0) != 0x80) {
80
            // This byte is not a continuation byte even though it should have
81
            // been one. We stop decoding of the incomplete character at this
82
            // point (but still use the rest of the incomplete bytes from this
83
            // chunk) and assume that the new, unexpected byte starts a new one.
84
123
            state_[kMissingBytes] = 0;
85
123
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
86
123
            state_[kBufferedBytes] += i;
87
123
            data += i;
88
123
            nread -= i;
89
123
            break;
90
          }
91
        }
92
      }
93
94
      size_t found_bytes =
95
3547
          std::min(nread, static_cast<size_t>(MissingBytes()));
96
7094
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
97
             data,
98
7094
             found_bytes);
99
      // Adjust the two buffers.
100
3547
      data += found_bytes;
101
3547
      nread -= found_bytes;
102
103
3547
      state_[kMissingBytes] -= found_bytes;
104
3547
      state_[kBufferedBytes] += found_bytes;
105
106
3547
      if (LIKELY(MissingBytes() == 0)) {
107
        // If no more bytes are missing, create a small string that we
108
        // will later prepend.
109
6382
        if (!MakeString(isolate,
110
3191
                        IncompleteCharacterBuffer(),
111
3191
                        BufferedBytes(),
112
15955
                        Encoding()).ToLocal(&prepend)) {
113
          return MaybeLocal<String>();
114
        }
115
116
3191
        *nread_ptr += BufferedBytes();
117
        // No more buffered bytes.
118
3191
        state_[kBufferedBytes] = 0;
119
      }
120
    }
121
122
    // It could be that trying to finish the previous chunk already
123
    // consumed all data that we received in this chunk.
124
44063
    if (UNLIKELY(nread == 0)) {
125
3190
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
126
2471
      prepend = Local<String>();
127
    } else {
128
      // If not, that means is no character left to finish at this point.
129
      DCHECK_EQ(MissingBytes(), 0);
130
      DCHECK_EQ(BufferedBytes(), 0);
131
132
      // See whether there is a character that we may have to cut off and
133
      // finish when receiving the next chunk.
134

41592
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
135
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
136
        // This means we'll need to figure out where the character to which
137
        // the byte belongs begins.
138
2895
        for (size_t i = nread - 1; ; --i) {
139
          DCHECK_LT(i, nread);
140
2895
          state_[kBufferedBytes]++;
141
2895
          if ((data[i] & 0xC0) == 0x80) {
142
            // This byte does not start a character (a "trailing" byte).
143

1094
            if (state_[kBufferedBytes] >= 4 || i == 0) {
144
              // We either have more then 4 trailing bytes (which means
145
              // the current character would not be inside the range for
146
              // valid Unicode, and in particular cannot be represented
147
              // through JavaScript's UTF-16-based approach to strings), or the
148
              // current buffer does not contain the start of an UTF-8 character
149
              // at all. Either way, this is invalid UTF8 and we can just
150
              // let the engine's decoder handle it.
151
50
              state_[kBufferedBytes] = 0;
152
50
              break;
153
            }
154
          } else {
155
            // Found the first byte of a UTF-8 character. By looking at the
156
            // upper bits we can tell how long the character *should* be.
157
1801
            if ((data[i] & 0xE0) == 0xC0) {
158
530
              state_[kMissingBytes] = 2;
159
1271
            } else if ((data[i] & 0xF0) == 0xE0) {
160
1128
              state_[kMissingBytes] = 3;
161
143
            } else if ((data[i] & 0xF8) == 0xF0) {
162
46
              state_[kMissingBytes] = 4;
163
            } else {
164
              // This lead byte would indicate a character outside of the
165
              // representable range.
166
97
              state_[kBufferedBytes] = 0;
167
97
              break;
168
            }
169
170
1704
            if (BufferedBytes() >= MissingBytes()) {
171
              // Received more or exactly as many trailing bytes than the lead
172
              // character would indicate. In the "==" case, we have valid
173
              // data and don't need to slice anything off;
174
              // in the ">" case, this is invalid UTF-8 anyway.
175
384
              state_[kMissingBytes] = 0;
176
384
              state_[kBufferedBytes] = 0;
177
            }
178
179
1704
            state_[kMissingBytes] -= state_[kBufferedBytes];
180
1704
            break;
181
          }
182
1044
        }
183
39741
      } else if (Encoding() == UCS2) {
184
2735
        if ((nread % 2) == 1) {
185
          // We got half a codepoint, and need the second byte of it.
186
1703
          state_[kBufferedBytes] = 1;
187
1703
          state_[kMissingBytes] = 1;
188
1032
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
189
          // Half a split UTF-16 character.
190
11
          state_[kBufferedBytes] = 2;
191
11
          state_[kMissingBytes] = 2;
192
        }
193
37006
      } else if (Encoding() == BASE64) {
194
574
        state_[kBufferedBytes] = nread % 3;
195
574
        if (state_[kBufferedBytes] > 0)
196
398
          state_[kMissingBytes] = 3 - BufferedBytes();
197
      }
198
199
41592
      if (BufferedBytes() > 0) {
200
        // Copy the requested number of buffered bytes from the end of the
201
        // input into the incomplete character buffer.
202
3432
        nread -= BufferedBytes();
203
3432
        *nread_ptr -= BufferedBytes();
204
3432
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
205
      }
206
207
41592
      if (nread > 0) {
208
79038
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
209
          return MaybeLocal<String>();
210
      } else {
211
2073
        body = String::Empty(isolate);
212
      }
213
    }
214
215
44063
    if (prepend.IsEmpty()) {
216
42624
      return body;
217
    } else {
218
2878
      return String::Concat(isolate, prepend, body);
219
    }
220
  } else {
221


6934
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
222
6934
    return MakeString(isolate, data, nread, Encoding());
223
  }
224
}
225
226
241
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
227


241
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
228
    CHECK_EQ(MissingBytes(), 0);
229
    CHECK_EQ(BufferedBytes(), 0);
230
  }
231
232

241
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
233
    // Ignore a single trailing byte, like the JS decoder does.
234
88
    state_[kMissingBytes]--;
235
88
    state_[kBufferedBytes]--;
236
  }
237
238
241
  if (BufferedBytes() == 0)
239
87
    return String::Empty(isolate);
240
241
  MaybeLocal<String> ret =
242
      MakeString(isolate,
243
154
                 IncompleteCharacterBuffer(),
244
154
                 BufferedBytes(),
245
462
                 Encoding());
246
247
154
  state_[kMissingBytes] = 0;
248
154
  state_[kBufferedBytes] = 0;
249
250
154
  return ret;
251
}
252
253
namespace {
254
255
50997
void DecodeData(const FunctionCallbackInfo<Value>& args) {
256
  StringDecoder* decoder =
257
50997
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
258
50997
  CHECK_NOT_NULL(decoder);
259
260
101994
  CHECK(args[1]->IsArrayBufferView());
261
101994
  ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>());
262
50997
  size_t length = content.length();
263
264
  MaybeLocal<String> ret =
265
101994
      decoder->DecodeData(args.GetIsolate(), content.data(), &length);
266
50997
  if (!ret.IsEmpty())
267
101994
    args.GetReturnValue().Set(ret.ToLocalChecked());
268
50997
}
269
270
241
void FlushData(const FunctionCallbackInfo<Value>& args) {
271
  StringDecoder* decoder =
272
241
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
273
241
  CHECK_NOT_NULL(decoder);
274
241
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
275
241
  if (!ret.IsEmpty())
276
482
    args.GetReturnValue().Set(ret.ToLocalChecked());
277
241
}
278
279
5156
void InitializeStringDecoder(Local<Object> target,
280
                             Local<Value> unused,
281
                             Local<Context> context,
282
                             void* priv) {
283
5156
  Environment* env = Environment::GetCurrent(context);
284
5156
  Isolate* isolate = env->isolate();
285
286
#define SET_DECODER_CONSTANT(name)                                            \
287
  target->Set(context,                                                        \
288
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
289
              Integer::New(isolate, StringDecoder::name)).FromJust()
290
291
20624
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
292
20624
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
293
20624
  SET_DECODER_CONSTANT(kMissingBytes);
294
20624
  SET_DECODER_CONSTANT(kBufferedBytes);
295
20624
  SET_DECODER_CONSTANT(kEncodingField);
296
20624
  SET_DECODER_CONSTANT(kNumFields);
297
298
5156
  Local<Array> encodings = Array::New(isolate);
299
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
300
  encodings->Set(context,                                                     \
301
                 static_cast<int32_t>(cname),                                 \
302
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
303
15468
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
304
15468
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
305
15468
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
306
15468
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
307
15468
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
308
15468
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
309
15468
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
310
311
  target->Set(context,
312
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
313
15468
              encodings).Check();
314
315
  target->Set(context,
316
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
317
20624
              Integer::New(isolate, sizeof(StringDecoder))).Check();
318
319
5156
  env->SetMethod(target, "decode", DecodeData);
320
5156
  env->SetMethod(target, "flush", FlushData);
321
5156
}
322
323
}  // anonymous namespace
324
325
}  // namespace node
326
327
5010
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
328
                                   node::InitializeStringDecoder)