GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 134 140 95.7 %
Date: 2019-03-02 22:23:06 Branches: 91 112 81.3 %

Line Branch Exec Source
1
#include "env-inl.h"
2
#include "node_buffer.h"
3
#include "string_bytes.h"
4
#include "string_decoder-inl.h"
5
6
using v8::Array;
7
using v8::ArrayBufferView;
8
using v8::Context;
9
using v8::FunctionCallbackInfo;
10
using v8::Integer;
11
using v8::Isolate;
12
using v8::Local;
13
using v8::MaybeLocal;
14
using v8::Object;
15
using v8::String;
16
using v8::Value;
17
18
namespace node {
19
20
namespace {
21
22
30418
MaybeLocal<String> MakeString(Isolate* isolate,
23
                              const char* data,
24
                              size_t length,
25
                              enum encoding encoding) {
26
  Local<Value> error;
27
  MaybeLocal<Value> ret;
28
30418
  if (encoding == UTF8) {
29
    return String::NewFromUtf8(
30
        isolate,
31
        data,
32
        v8::NewStringType::kNormal,
33
20390
        length);
34
  } else {
35
    ret = StringBytes::Encode(
36
        isolate,
37
        data,
38
        length,
39
        encoding,
40
10028
        &error);
41
  }
42
43
10028
  if (ret.IsEmpty()) {
44
    CHECK(!error.IsEmpty());
45
    isolate->ThrowException(error);
46
  }
47
48
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
49
20056
  return ret.FromMaybe(Local<Value>()).As<String>();
50
}
51
52
}  // anonymous namespace
53
54
55
31471
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
56
                                             const char* data,
57
                                             size_t* nread_ptr) {
58
  Local<String> prepend, body;
59
60
31471
  size_t nread = *nread_ptr;
61
62


31471
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
63
    // See if we want bytes to finish a character from the previous
64
    // chunk; if so, copy the new bytes to the missing bytes buffer
65
    // and create a small string from it that is to be prepended to the
66
    // main body.
67
25087
    if (MissingBytes() > 0) {
68
      // There are never more bytes missing than the pre-calculated maximum.
69
3382
      CHECK_LE(MissingBytes() + BufferedBytes(),
70
               kIncompleteCharactersEnd);
71
3382
      if (Encoding() == UTF8) {
72
        // For UTF-8, we need special treatment to align with the V8 decoder:
73
        // If an incomplete character is found at a chunk boundary, we use
74
        // its remainder and pass it to V8 as-is.
75

3241
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
76
1808
          if ((data[i] & 0xC0) != 0x80) {
77
            // This byte is not a continuation byte even though it should have
78
            // been one. We stop decoding of the incomplete character at this
79
            // point (but still use the rest of the incomplete bytes from this
80
            // chunk) and assume that the new, unexpected byte starts a new one.
81
79
            state_[kMissingBytes] = 0;
82
79
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
83
79
            state_[kBufferedBytes] += i;
84
79
            data += i;
85
79
            nread -= i;
86
79
            break;
87
          }
88
        }
89
      }
90
91
      size_t found_bytes =
92
3382
          std::min(nread, static_cast<size_t>(MissingBytes()));
93
6764
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
94
             data,
95
6764
             found_bytes);
96
      // Adjust the two buffers.
97
3382
      data += found_bytes;
98
3382
      nread -= found_bytes;
99
100
3382
      state_[kMissingBytes] -= found_bytes;
101
3382
      state_[kBufferedBytes] += found_bytes;
102
103
3382
      if (LIKELY(MissingBytes() == 0)) {
104
        // If no more bytes are missing, create a small string that we
105
        // will later prepend.
106
6054
        if (!MakeString(isolate,
107
3027
                        IncompleteCharacterBuffer(),
108
3027
                        BufferedBytes(),
109
15135
                        Encoding()).ToLocal(&prepend)) {
110
          return MaybeLocal<String>();
111
        }
112
113
3027
        *nread_ptr += BufferedBytes();
114
        // No more buffered bytes.
115
3027
        state_[kBufferedBytes] = 0;
116
      }
117
    }
118
119
    // It could be that trying to finish the previous chunk already
120
    // consumed all data that we received in this chunk.
121
25087
    if (UNLIKELY(nread == 0)) {
122
2551
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
123
2150
      prepend = Local<String>();
124
    } else {
125
      // If not, that means is no character left to finish at this point.
126
      DCHECK_EQ(MissingBytes(), 0);
127
      DCHECK_EQ(BufferedBytes(), 0);
128
129
      // See whether there is a character that we may have to cut off and
130
      // finish when receiving the next chunk.
131

22937
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
132
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
133
        // This means we'll need to figure out where the character to which
134
        // the byte belongs begins.
135
2737
        for (size_t i = nread - 1; ; --i) {
136
          DCHECK_LT(i, nread);
137
2737
          state_[kBufferedBytes]++;
138
2737
          if ((data[i] & 0xC0) == 0x80) {
139
            // This byte does not start a character (a "trailing" byte).
140

1042
            if (state_[kBufferedBytes] >= 4 || i == 0) {
141
              // We either have more then 4 trailing bytes (which means
142
              // the current character would not be inside the range for
143
              // valid Unicode, and in particular cannot be represented
144
              // through JavaScript's UTF-16-based approach to strings), or the
145
              // current buffer does not contain the start of an UTF-8 character
146
              // at all. Either way, this is invalid UTF8 and we can just
147
              // let the engine's decoder handle it.
148
51
              state_[kBufferedBytes] = 0;
149
51
              break;
150
            }
151
          } else {
152
            // Found the first byte of a UTF-8 character. By looking at the
153
            // upper bits we can tell how long the character *should* be.
154
1695
            if ((data[i] & 0xE0) == 0xC0) {
155
497
              state_[kMissingBytes] = 2;
156
1198
            } else if ((data[i] & 0xF0) == 0xE0) {
157
1104
              state_[kMissingBytes] = 3;
158
94
            } else if ((data[i] & 0xF8) == 0xF0) {
159
39
              state_[kMissingBytes] = 4;
160
            } else {
161
              // This lead byte would indicate a character outside of the
162
              // representable range.
163
55
              state_[kBufferedBytes] = 0;
164
55
              break;
165
            }
166
167
1640
            if (BufferedBytes() >= MissingBytes()) {
168
              // Received more or exactly as many trailing bytes than the lead
169
              // character would indicate. In the "==" case, we have valid
170
              // data and don't need to slice anything off;
171
              // in the ">" case, this is invalid UTF-8 anyway.
172
387
              state_[kMissingBytes] = 0;
173
387
              state_[kBufferedBytes] = 0;
174
            }
175
176
1640
            state_[kMissingBytes] -= state_[kBufferedBytes];
177
1640
            break;
178
          }
179
991
        }
180
21191
      } else if (Encoding() == UCS2) {
181
2582
        if ((nread % 2) == 1) {
182
          // We got half a codepoint, and need the second byte of it.
183
1618
          state_[kBufferedBytes] = 1;
184
1618
          state_[kMissingBytes] = 1;
185
964
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
186
          // Half a split UTF-16 character.
187
11
          state_[kBufferedBytes] = 2;
188
11
          state_[kMissingBytes] = 2;
189
        }
190
18609
      } else if (Encoding() == BASE64) {
191
453
        state_[kBufferedBytes] = nread % 3;
192
453
        if (state_[kBufferedBytes] > 0)
193
337
          state_[kMissingBytes] = 3 - BufferedBytes();
194
      }
195
196
22937
      if (BufferedBytes() > 0) {
197
        // Copy the requested number of buffered bytes from the end of the
198
        // input into the incomplete character buffer.
199
3219
        nread -= BufferedBytes();
200
3219
        *nread_ptr -= BufferedBytes();
201
3219
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
202
      }
203
204
22937
      if (nread > 0) {
205
41764
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
206
          return MaybeLocal<String>();
207
      } else {
208
2055
        body = String::Empty(isolate);
209
      }
210
    }
211
212
25087
    if (prepend.IsEmpty()) {
213
23809
      return body;
214
    } else {
215
2556
      return String::Concat(isolate, prepend, body);
216
    }
217
  } else {
218


6384
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
219
6384
    return MakeString(isolate, data, nread, Encoding());
220
  }
221
}
222
223
192
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
224


192
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
225
    CHECK_EQ(MissingBytes(), 0);
226
    CHECK_EQ(BufferedBytes(), 0);
227
  }
228
229

192
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
230
    // Ignore a single trailing byte, like the JS decoder does.
231
68
    state_[kMissingBytes]--;
232
68
    state_[kBufferedBytes]--;
233
  }
234
235
192
  if (BufferedBytes() == 0)
236
67
    return String::Empty(isolate);
237
238
  MaybeLocal<String> ret =
239
      MakeString(isolate,
240
125
                 IncompleteCharacterBuffer(),
241
125
                 BufferedBytes(),
242
375
                 Encoding());
243
244
125
  state_[kMissingBytes] = 0;
245
125
  state_[kBufferedBytes] = 0;
246
247
125
  return ret;
248
}
249
250
namespace {
251
252
31471
void DecodeData(const FunctionCallbackInfo<Value>& args) {
253
  StringDecoder* decoder =
254
31471
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
255
31471
  CHECK_NOT_NULL(decoder);
256
257
62942
  CHECK(args[1]->IsArrayBufferView());
258
62942
  ArrayBufferViewContents<char> content(args[1].As<ArrayBufferView>());
259
31471
  size_t length = content.length();
260
261
  MaybeLocal<String> ret =
262
62942
      decoder->DecodeData(args.GetIsolate(), content.data(), &length);
263
31471
  if (!ret.IsEmpty())
264
62942
    args.GetReturnValue().Set(ret.ToLocalChecked());
265
31471
}
266
267
192
void FlushData(const FunctionCallbackInfo<Value>& args) {
268
  StringDecoder* decoder =
269
192
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
270
192
  CHECK_NOT_NULL(decoder);
271
192
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
272
192
  if (!ret.IsEmpty())
273
384
    args.GetReturnValue().Set(ret.ToLocalChecked());
274
192
}
275
276
1405
void InitializeStringDecoder(Local<Object> target,
277
                             Local<Value> unused,
278
                             Local<Context> context,
279
                             void* priv) {
280
1405
  Environment* env = Environment::GetCurrent(context);
281
1405
  Isolate* isolate = env->isolate();
282
283
#define SET_DECODER_CONSTANT(name)                                            \
284
  target->Set(context,                                                        \
285
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
286
              Integer::New(isolate, StringDecoder::name)).FromJust()
287
288
5620
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
289
5620
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
290
5620
  SET_DECODER_CONSTANT(kMissingBytes);
291
5620
  SET_DECODER_CONSTANT(kBufferedBytes);
292
5620
  SET_DECODER_CONSTANT(kEncodingField);
293
5620
  SET_DECODER_CONSTANT(kNumFields);
294
295
1405
  Local<Array> encodings = Array::New(isolate);
296
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
297
  encodings->Set(context,                                                     \
298
                 static_cast<int32_t>(cname),                                 \
299
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
300
4215
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
301
4215
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
302
4215
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
303
4215
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
304
4215
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
305
4215
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
306
4215
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
307
308
  target->Set(context,
309
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
310
4215
              encodings).FromJust();
311
312
  target->Set(context,
313
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
314
5620
              Integer::New(isolate, sizeof(StringDecoder))).FromJust();
315
316
1405
  env->SetMethod(target, "decode", DecodeData);
317
1405
  env->SetMethod(target, "flush", FlushData);
318
1405
}
319
320
}  // anonymous namespace
321
322
}  // namespace node
323
324
4292
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
325
                                   node::InitializeStringDecoder)