GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 134 140 95.7 %
Date: 2019-05-05 22:32:45 Branches: 91 112 81.3 %

Line Branch Exec Source
1
#include "string_decoder.h"  // NOLINT(build/include_inline)
2
#include "string_decoder-inl.h"
3
4
#include "env-inl.h"
5
#include "node_buffer.h"
6
#include "string_bytes.h"
7
8
using v8::Array;
9
using v8::ArrayBufferView;
10
using v8::Context;
11
using v8::FunctionCallbackInfo;
12
using v8::Integer;
13
using v8::Isolate;
14
using v8::Local;
15
using v8::MaybeLocal;
16
using v8::Object;
17
using v8::String;
18
using v8::Value;
19
20
namespace node {
21
22
namespace {
23
24
96881
MaybeLocal<String> MakeString(Isolate* isolate,
25
                              const char* data,
26
                              size_t length,
27
                              enum encoding encoding) {
28
  Local<Value> error;
29
  MaybeLocal<Value> ret;
30
96881
  if (encoding == UTF8) {
31
    return String::NewFromUtf8(
32
        isolate,
33
        data,
34
        v8::NewStringType::kNormal,
35
86403
        length);
36
  } else {
37
    ret = StringBytes::Encode(
38
        isolate,
39
        data,
40
        length,
41
        encoding,
42
10478
        &error);
43
  }
44
45
10478
  if (ret.IsEmpty()) {
46
    CHECK(!error.IsEmpty());
47
    isolate->ThrowException(error);
48
  }
49
50
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
51
20956
  return ret.FromMaybe(Local<Value>()).As<String>();
52
}
53
54
}  // anonymous namespace
55
56
57
97912
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
58
                                             const char* data,
59
                                             size_t* nread_ptr) {
60
  Local<String> prepend, body;
61
62
97912
  size_t nread = *nread_ptr;
63
64


97912
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
65
    // See if we want bytes to finish a character from the previous
66
    // chunk; if so, copy the new bytes to the missing bytes buffer
67
    // and create a small string from it that is to be prepended to the
68
    // main body.
69
91325
    if (MissingBytes() > 0) {
70
      // There are never more bytes missing than the pre-calculated maximum.
71
3460
      CHECK_LE(MissingBytes() + BufferedBytes(),
72
               kIncompleteCharactersEnd);
73
3460
      if (Encoding() == UTF8) {
74
        // For UTF-8, we need special treatment to align with the V8 decoder:
75
        // If an incomplete character is found at a chunk boundary, we use
76
        // its remainder and pass it to V8 as-is.
77

3269
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
78
1827
          if ((data[i] & 0xC0) != 0x80) {
79
            // This byte is not a continuation byte even though it should have
80
            // been one. We stop decoding of the incomplete character at this
81
            // point (but still use the rest of the incomplete bytes from this
82
            // chunk) and assume that the new, unexpected byte starts a new one.
83
91
            state_[kMissingBytes] = 0;
84
91
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
85
91
            state_[kBufferedBytes] += i;
86
91
            data += i;
87
91
            nread -= i;
88
91
            break;
89
          }
90
        }
91
      }
92
93
      size_t found_bytes =
94
3460
          std::min(nread, static_cast<size_t>(MissingBytes()));
95
6920
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
96
             data,
97
6920
             found_bytes);
98
      // Adjust the two buffers.
99
3460
      data += found_bytes;
100
3460
      nread -= found_bytes;
101
102
3460
      state_[kMissingBytes] -= found_bytes;
103
3460
      state_[kBufferedBytes] += found_bytes;
104
105
3460
      if (LIKELY(MissingBytes() == 0)) {
106
        // If no more bytes are missing, create a small string that we
107
        // will later prepend.
108
6208
        if (!MakeString(isolate,
109
3104
                        IncompleteCharacterBuffer(),
110
3104
                        BufferedBytes(),
111
15520
                        Encoding()).ToLocal(&prepend)) {
112
          return MaybeLocal<String>();
113
        }
114
115
3104
        *nread_ptr += BufferedBytes();
116
        // No more buffered bytes.
117
3104
        state_[kBufferedBytes] = 0;
118
      }
119
    }
120
121
    // It could be that trying to finish the previous chunk already
122
    // consumed all data that we received in this chunk.
123
91325
    if (UNLIKELY(nread == 0)) {
124
2681
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
125
2214
      prepend = Local<String>();
126
    } else {
127
      // If not, that means is no character left to finish at this point.
128
      DCHECK_EQ(MissingBytes(), 0);
129
      DCHECK_EQ(BufferedBytes(), 0);
130
131
      // See whether there is a character that we may have to cut off and
132
      // finish when receiving the next chunk.
133

89111
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
134
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
135
        // This means we'll need to figure out where the character to which
136
        // the byte belongs begins.
137
2787
        for (size_t i = nread - 1; ; --i) {
138
          DCHECK_LT(i, nread);
139
2787
          state_[kBufferedBytes]++;
140
2787
          if ((data[i] & 0xC0) == 0x80) {
141
            // This byte does not start a character (a "trailing" byte).
142

1051
            if (state_[kBufferedBytes] >= 4 || i == 0) {
143
              // We either have more then 4 trailing bytes (which means
144
              // the current character would not be inside the range for
145
              // valid Unicode, and in particular cannot be represented
146
              // through JavaScript's UTF-16-based approach to strings), or the
147
              // current buffer does not contain the start of an UTF-8 character
148
              // at all. Either way, this is invalid UTF8 and we can just
149
              // let the engine's decoder handle it.
150
47
              state_[kBufferedBytes] = 0;
151
47
              break;
152
            }
153
          } else {
154
            // Found the first byte of a UTF-8 character. By looking at the
155
            // upper bits we can tell how long the character *should* be.
156
1736
            if ((data[i] & 0xE0) == 0xC0) {
157
511
              state_[kMissingBytes] = 2;
158
1225
            } else if ((data[i] & 0xF0) == 0xE0) {
159
1105
              state_[kMissingBytes] = 3;
160
120
            } else if ((data[i] & 0xF8) == 0xF0) {
161
40
              state_[kMissingBytes] = 4;
162
            } else {
163
              // This lead byte would indicate a character outside of the
164
              // representable range.
165
80
              state_[kBufferedBytes] = 0;
166
80
              break;
167
            }
168
169
1656
            if (BufferedBytes() >= MissingBytes()) {
170
              // Received more or exactly as many trailing bytes than the lead
171
              // character would indicate. In the "==" case, we have valid
172
              // data and don't need to slice anything off;
173
              // in the ">" case, this is invalid UTF-8 anyway.
174
384
              state_[kMissingBytes] = 0;
175
384
              state_[kBufferedBytes] = 0;
176
            }
177
178
1656
            state_[kMissingBytes] -= state_[kBufferedBytes];
179
1656
            break;
180
          }
181
1004
        }
182
87328
      } else if (Encoding() == UCS2) {
183
2706
        if ((nread % 2) == 1) {
184
          // We got half a codepoint, and need the second byte of it.
185
1683
          state_[kBufferedBytes] = 1;
186
1683
          state_[kMissingBytes] = 1;
187
1023
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
188
          // Half a split UTF-16 character.
189
12
          state_[kBufferedBytes] = 2;
190
12
          state_[kMissingBytes] = 2;
191
        }
192
84622
      } else if (Encoding() == BASE64) {
193
506
        state_[kBufferedBytes] = nread % 3;
194
506
        if (state_[kBufferedBytes] > 0)
195
359
          state_[kMissingBytes] = 3 - BufferedBytes();
196
      }
197
198
89111
      if (BufferedBytes() > 0) {
199
        // Copy the requested number of buffered bytes from the end of the
200
        // input into the incomplete character buffer.
201
3326
        nread -= BufferedBytes();
202
3326
        *nread_ptr -= BufferedBytes();
203
3326
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
204
      }
205
206
89111
      if (nread > 0) {
207
174098
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
208
          return MaybeLocal<String>();
209
      } else {
210
2062
        body = String::Empty(isolate);
211
      }
212
    }
213
214
91325
    if (prepend.IsEmpty()) {
215
89968
      return body;
216
    } else {
217
2714
      return String::Concat(isolate, prepend, body);
218
    }
219
  } else {
220


6587
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
221
6587
    return MakeString(isolate, data, nread, Encoding());
222
  }
223
}
224
225
222
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
226


222
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
227
    CHECK_EQ(MissingBytes(), 0);
228
    CHECK_EQ(BufferedBytes(), 0);
229
  }
230
231

222
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
232
    // Ignore a single trailing byte, like the JS decoder does.
233
82
    state_[kMissingBytes]--;
234
82
    state_[kBufferedBytes]--;
235
  }
236
237
222
  if (BufferedBytes() == 0)
238
81
    return String::Empty(isolate);
239
240
  MaybeLocal<String> ret =
241
      MakeString(isolate,
242
141
                 IncompleteCharacterBuffer(),
243
141
                 BufferedBytes(),
244
423
                 Encoding());
245
246
141
  state_[kMissingBytes] = 0;
247
141
  state_[kBufferedBytes] = 0;
248
249
141
  return ret;
250
}
251
252
namespace {
253
254
97912
void DecodeData(const FunctionCallbackInfo<Value>& args) {
255
  StringDecoder* decoder =
256
97912
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
257
97912
  CHECK_NOT_NULL(decoder);
258
259
195824
  CHECK(args[1]->IsArrayBufferView());
260
195824
  ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>());
261
97912
  size_t length = content.length();
262
263
  MaybeLocal<String> ret =
264
195824
      decoder->DecodeData(args.GetIsolate(), content.data(), &length);
265
97912
  if (!ret.IsEmpty())
266
195824
    args.GetReturnValue().Set(ret.ToLocalChecked());
267
97912
}
268
269
222
void FlushData(const FunctionCallbackInfo<Value>& args) {
270
  StringDecoder* decoder =
271
222
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
272
222
  CHECK_NOT_NULL(decoder);
273
222
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
274
222
  if (!ret.IsEmpty())
275
444
    args.GetReturnValue().Set(ret.ToLocalChecked());
276
222
}
277
278
1509
void InitializeStringDecoder(Local<Object> target,
279
                             Local<Value> unused,
280
                             Local<Context> context,
281
                             void* priv) {
282
1509
  Environment* env = Environment::GetCurrent(context);
283
1509
  Isolate* isolate = env->isolate();
284
285
#define SET_DECODER_CONSTANT(name)                                            \
286
  target->Set(context,                                                        \
287
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
288
              Integer::New(isolate, StringDecoder::name)).FromJust()
289
290
6036
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
291
6036
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
292
6036
  SET_DECODER_CONSTANT(kMissingBytes);
293
6036
  SET_DECODER_CONSTANT(kBufferedBytes);
294
6036
  SET_DECODER_CONSTANT(kEncodingField);
295
6036
  SET_DECODER_CONSTANT(kNumFields);
296
297
1509
  Local<Array> encodings = Array::New(isolate);
298
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
299
  encodings->Set(context,                                                     \
300
                 static_cast<int32_t>(cname),                                 \
301
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
302
4527
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
303
4527
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
304
4527
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
305
4527
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
306
4527
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
307
4527
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
308
4527
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
309
310
  target->Set(context,
311
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
312
4527
              encodings).Check();
313
314
  target->Set(context,
315
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
316
6036
              Integer::New(isolate, sizeof(StringDecoder))).Check();
317
318
1509
  env->SetMethod(target, "decode", DecodeData);
319
1509
  env->SetMethod(target, "flush", FlushData);
320
1509
}
321
322
}  // anonymous namespace
323
324
}  // namespace node
325
326
4524
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
327
                                   node::InitializeStringDecoder)