GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/string_decoder.cc Lines: 132 138 95.7 %
Date: 2019-02-26 22:23:30 Branches: 90 110 81.8 %

Line Branch Exec Source
1
#include "env-inl.h"
2
#include "node_buffer.h"
3
#include "string_bytes.h"
4
#include "string_decoder-inl.h"
5
6
using v8::Array;
7
using v8::Context;
8
using v8::FunctionCallbackInfo;
9
using v8::Integer;
10
using v8::Isolate;
11
using v8::Local;
12
using v8::MaybeLocal;
13
using v8::Object;
14
using v8::String;
15
using v8::Value;
16
17
namespace node {
18
19
namespace {
20
21
30440
MaybeLocal<String> MakeString(Isolate* isolate,
22
                              const char* data,
23
                              size_t length,
24
                              enum encoding encoding) {
25
  Local<Value> error;
26
  MaybeLocal<Value> ret;
27
30440
  if (encoding == UTF8) {
28
    return String::NewFromUtf8(
29
        isolate,
30
        data,
31
        v8::NewStringType::kNormal,
32
20299
        length);
33
  } else {
34
    ret = StringBytes::Encode(
35
        isolate,
36
        data,
37
        length,
38
        encoding,
39
10141
        &error);
40
  }
41
42
10141
  if (ret.IsEmpty()) {
43
    CHECK(!error.IsEmpty());
44
    isolate->ThrowException(error);
45
  }
46
47
  DCHECK(ret.IsEmpty() || ret.ToLocalChecked()->IsString());
48
20282
  return ret.FromMaybe(Local<Value>()).As<String>();
49
}
50
51
}  // anonymous namespace
52
53
54
31523
MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
55
                                             const char* data,
56
                                             size_t* nread_ptr) {
57
  Local<String> prepend, body;
58
59
31523
  size_t nread = *nread_ptr;
60
61


31523
  if (Encoding() == UTF8 || Encoding() == UCS2 || Encoding() == BASE64) {
62
    // See if we want bytes to finish a character from the previous
63
    // chunk; if so, copy the new bytes to the missing bytes buffer
64
    // and create a small string from it that is to be prepended to the
65
    // main body.
66
24953
    if (MissingBytes() > 0) {
67
      // There are never more bytes missing than the pre-calculated maximum.
68
3350
      CHECK_LE(MissingBytes() + BufferedBytes(),
69
               kIncompleteCharactersEnd);
70
3350
      if (Encoding() == UTF8) {
71
        // For UTF-8, we need special treatment to align with the V8 decoder:
72
        // If an incomplete character is found at a chunk boundary, we use
73
        // its remainder and pass it to V8 as-is.
74

3260
        for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
75
1827
          if ((data[i] & 0xC0) != 0x80) {
76
            // This byte is not a continuation byte even though it should have
77
            // been one. We stop decoding of the incomplete character at this
78
            // point (but still use the rest of the incomplete bytes from this
79
            // chunk) and assume that the new, unexpected byte starts a new one.
80
95
            state_[kMissingBytes] = 0;
81
95
            memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
82
95
            state_[kBufferedBytes] += i;
83
95
            data += i;
84
95
            nread -= i;
85
95
            break;
86
          }
87
        }
88
      }
89
90
      size_t found_bytes =
91
3350
          std::min(nread, static_cast<size_t>(MissingBytes()));
92
6700
      memcpy(IncompleteCharacterBuffer() + BufferedBytes(),
93
             data,
94
6700
             found_bytes);
95
      // Adjust the two buffers.
96
3350
      data += found_bytes;
97
3350
      nread -= found_bytes;
98
99
3350
      state_[kMissingBytes] -= found_bytes;
100
3350
      state_[kBufferedBytes] += found_bytes;
101
102
3350
      if (LIKELY(MissingBytes() == 0)) {
103
        // If no more bytes are missing, create a small string that we
104
        // will later prepend.
105
5998
        if (!MakeString(isolate,
106
2999
                        IncompleteCharacterBuffer(),
107
2999
                        BufferedBytes(),
108
14995
                        Encoding()).ToLocal(&prepend)) {
109
          return MaybeLocal<String>();
110
        }
111
112
2999
        *nread_ptr += BufferedBytes();
113
        // No more buffered bytes.
114
2999
        state_[kBufferedBytes] = 0;
115
      }
116
    }
117
118
    // It could be that trying to finish the previous chunk already
119
    // consumed all data that we received in this chunk.
120
24953
    if (UNLIKELY(nread == 0)) {
121
2546
      body = !prepend.IsEmpty() ? prepend : String::Empty(isolate);
122
2147
      prepend = Local<String>();
123
    } else {
124
      // If not, that means is no character left to finish at this point.
125
      DCHECK_EQ(MissingBytes(), 0);
126
      DCHECK_EQ(BufferedBytes(), 0);
127
128
      // See whether there is a character that we may have to cut off and
129
      // finish when receiving the next chunk.
130

22806
      if (Encoding() == UTF8 && data[nread - 1] & 0x80) {
131
        // This is UTF-8 encoded data and we ended on a non-ASCII UTF-8 byte.
132
        // This means we'll need to figure out where the character to which
133
        // the byte belongs begins.
134
2772
        for (size_t i = nread - 1; ; --i) {
135
          DCHECK_LT(i, nread);
136
2772
          state_[kBufferedBytes]++;
137
2772
          if ((data[i] & 0xC0) == 0x80) {
138
            // This byte does not start a character (a "trailing" byte).
139

1050
            if (state_[kBufferedBytes] >= 4 || i == 0) {
140
              // We either have more then 4 trailing bytes (which means
141
              // the current character would not be inside the range for
142
              // valid Unicode, and in particular cannot be represented
143
              // through JavaScript's UTF-16-based approach to strings), or the
144
              // current buffer does not contain the start of an UTF-8 character
145
              // at all. Either way, this is invalid UTF8 and we can just
146
              // let the engine's decoder handle it.
147
51
              state_[kBufferedBytes] = 0;
148
51
              break;
149
            }
150
          } else {
151
            // Found the first byte of a UTF-8 character. By looking at the
152
            // upper bits we can tell how long the character *should* be.
153
1722
            if ((data[i] & 0xE0) == 0xC0) {
154
504
              state_[kMissingBytes] = 2;
155
1218
            } else if ((data[i] & 0xF0) == 0xE0) {
156
1106
              state_[kMissingBytes] = 3;
157
112
            } else if ((data[i] & 0xF8) == 0xF0) {
158
40
              state_[kMissingBytes] = 4;
159
            } else {
160
              // This lead byte would indicate a character outside of the
161
              // representable range.
162
72
              state_[kBufferedBytes] = 0;
163
72
              break;
164
            }
165
166
1650
            if (BufferedBytes() >= MissingBytes()) {
167
              // Received more or exactly as many trailing bytes than the lead
168
              // character would indicate. In the "==" case, we have valid
169
              // data and don't need to slice anything off;
170
              // in the ">" case, this is invalid UTF-8 anyway.
171
378
              state_[kMissingBytes] = 0;
172
378
              state_[kBufferedBytes] = 0;
173
            }
174
175
1650
            state_[kMissingBytes] -= state_[kBufferedBytes];
176
1650
            break;
177
          }
178
999
        }
179
21033
      } else if (Encoding() == UCS2) {
180
2604
        if ((nread % 2) == 1) {
181
          // We got half a codepoint, and need the second byte of it.
182
1623
          state_[kBufferedBytes] = 1;
183
1623
          state_[kMissingBytes] = 1;
184
981
        } else if ((data[nread - 1] & 0xFC) == 0xD8) {
185
          // Half a split UTF-16 character.
186
13
          state_[kBufferedBytes] = 2;
187
13
          state_[kMissingBytes] = 2;
188
        }
189
18429
      } else if (Encoding() == BASE64) {
190
407
        state_[kBufferedBytes] = nread % 3;
191
407
        if (state_[kBufferedBytes] > 0)
192
288
          state_[kMissingBytes] = 3 - BufferedBytes();
193
      }
194
195
22806
      if (BufferedBytes() > 0) {
196
        // Copy the requested number of buffered bytes from the end of the
197
        // input into the incomplete character buffer.
198
3196
        nread -= BufferedBytes();
199
3196
        *nread_ptr -= BufferedBytes();
200
3196
        memcpy(IncompleteCharacterBuffer(), data + nread, BufferedBytes());
201
      }
202
203
22806
      if (nread > 0) {
204
41498
        if (!MakeString(isolate, data, nread, Encoding()).ToLocal(&body))
205
          return MaybeLocal<String>();
206
      } else {
207
2057
        body = String::Empty(isolate);
208
      }
209
    }
210
211
24953
    if (prepend.IsEmpty()) {
212
23702
      return body;
213
    } else {
214
2502
      return String::Concat(isolate, prepend, body);
215
    }
216
  } else {
217


6570
    CHECK(Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1);
218
6570
    return MakeString(isolate, data, nread, Encoding());
219
  }
220
}
221
222
197
MaybeLocal<String> StringDecoder::FlushData(Isolate* isolate) {
223


197
  if (Encoding() == ASCII || Encoding() == HEX || Encoding() == LATIN1) {
224
    CHECK_EQ(MissingBytes(), 0);
225
    CHECK_EQ(BufferedBytes(), 0);
226
  }
227
228

197
  if (Encoding() == UCS2 && BufferedBytes() % 2 == 1) {
229
    // Ignore a single trailing byte, like the JS decoder does.
230
76
    state_[kMissingBytes]--;
231
76
    state_[kBufferedBytes]--;
232
  }
233
234
197
  if (BufferedBytes() == 0)
235
75
    return String::Empty(isolate);
236
237
  MaybeLocal<String> ret =
238
      MakeString(isolate,
239
122
                 IncompleteCharacterBuffer(),
240
122
                 BufferedBytes(),
241
366
                 Encoding());
242
243
122
  state_[kMissingBytes] = 0;
244
122
  state_[kBufferedBytes] = 0;
245
246
122
  return ret;
247
}
248
249
namespace {
250
251
31523
void DecodeData(const FunctionCallbackInfo<Value>& args) {
252
  StringDecoder* decoder =
253
31523
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
254
31523
  CHECK_NOT_NULL(decoder);
255
31523
  size_t nread = Buffer::Length(args[1]);
256
  MaybeLocal<String> ret =
257
63046
      decoder->DecodeData(args.GetIsolate(), Buffer::Data(args[1]), &nread);
258
31523
  if (!ret.IsEmpty())
259
63046
    args.GetReturnValue().Set(ret.ToLocalChecked());
260
31523
}
261
262
197
void FlushData(const FunctionCallbackInfo<Value>& args) {
263
  StringDecoder* decoder =
264
197
      reinterpret_cast<StringDecoder*>(Buffer::Data(args[0]));
265
197
  CHECK_NOT_NULL(decoder);
266
197
  MaybeLocal<String> ret = decoder->FlushData(args.GetIsolate());
267
197
  if (!ret.IsEmpty())
268
394
    args.GetReturnValue().Set(ret.ToLocalChecked());
269
197
}
270
271
1404
void InitializeStringDecoder(Local<Object> target,
272
                             Local<Value> unused,
273
                             Local<Context> context,
274
                             void* priv) {
275
1404
  Environment* env = Environment::GetCurrent(context);
276
1404
  Isolate* isolate = env->isolate();
277
278
#define SET_DECODER_CONSTANT(name)                                            \
279
  target->Set(context,                                                        \
280
              FIXED_ONE_BYTE_STRING(isolate, #name),                          \
281
              Integer::New(isolate, StringDecoder::name)).FromJust()
282
283
5616
  SET_DECODER_CONSTANT(kIncompleteCharactersStart);
284
5616
  SET_DECODER_CONSTANT(kIncompleteCharactersEnd);
285
5616
  SET_DECODER_CONSTANT(kMissingBytes);
286
5616
  SET_DECODER_CONSTANT(kBufferedBytes);
287
5616
  SET_DECODER_CONSTANT(kEncodingField);
288
5616
  SET_DECODER_CONSTANT(kNumFields);
289
290
1404
  Local<Array> encodings = Array::New(isolate);
291
#define ADD_TO_ENCODINGS_ARRAY(cname, jsname)                                 \
292
  encodings->Set(context,                                                     \
293
                 static_cast<int32_t>(cname),                                 \
294
                 FIXED_ONE_BYTE_STRING(isolate, jsname)).FromJust()
295
4212
  ADD_TO_ENCODINGS_ARRAY(ASCII, "ascii");
296
4212
  ADD_TO_ENCODINGS_ARRAY(UTF8, "utf8");
297
4212
  ADD_TO_ENCODINGS_ARRAY(BASE64, "base64");
298
4212
  ADD_TO_ENCODINGS_ARRAY(UCS2, "utf16le");
299
4212
  ADD_TO_ENCODINGS_ARRAY(HEX, "hex");
300
4212
  ADD_TO_ENCODINGS_ARRAY(BUFFER, "buffer");
301
4212
  ADD_TO_ENCODINGS_ARRAY(LATIN1, "latin1");
302
303
  target->Set(context,
304
              FIXED_ONE_BYTE_STRING(isolate, "encodings"),
305
4212
              encodings).FromJust();
306
307
  target->Set(context,
308
              FIXED_ONE_BYTE_STRING(isolate, "kSize"),
309
5616
              Integer::New(isolate, sizeof(StringDecoder))).FromJust();
310
311
1404
  env->SetMethod(target, "decode", DecodeData);
312
1404
  env->SetMethod(target, "flush", FlushData);
313
1404
}
314
315
}  // anonymous namespace
316
317
}  // namespace node
318
319
4282
NODE_MODULE_CONTEXT_AWARE_INTERNAL(string_decoder,
320
                                   node::InitializeStringDecoder)