GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/large_pages/node_large_page.cc Lines: 18 117 15.4 %
Date: 2020-02-27 22:14:15 Branches: 10 60 16.7 %

Line Branch Exec Source
1
// Copyright (C) 2018 Intel Corporation
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a copy
4
// of this software and associated documentation files (the "Software"),
5
// to deal in the Software without restriction, including without limitation
6
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
// and/or sell copies of the Software, and to permit persons to whom
8
// the Software is furnished to do so, subject to the following conditions:
9
//
10
// The above copyright notice and this permission notice shall be included
11
// in all copies or substantial portions of the Software.
12
//
13
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17
// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19
// OR OTHER DEALINGS IN THE SOFTWARE.
20
//
21
// SPDX-License-Identifier: MIT
22
23
#include "node_large_page.h"
24
25
#include <cerrno>   // NOLINT(build/include)
26
27
// Besides returning ENOTSUP at runtime we do nothing if this define is missing.
28
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
29
#include "util.h"
30
#include "uv.h"
31
32
#include <fcntl.h>  // _O_RDWR
33
#include <sys/types.h>
34
#include <sys/mman.h>
35
#if defined(__FreeBSD__)
36
#include <sys/sysctl.h>
37
#include <sys/user.h>
38
#elif defined(__APPLE__)
39
#include <mach/vm_map.h>
40
#endif
41
#include <unistd.h>  // readlink
42
43
#include <climits>  // PATH_MAX
44
#include <clocale>
45
#include <csignal>
46
#include <cstdio>
47
#include <cstdlib>
48
#include <cstdint>
49
#include <cstring>
50
#include <string>
51
#include <fstream>
52
#include <iostream>
53
#include <sstream>
54
#include <vector>
55
56
// The functions in this file map the text segment of node into 2M pages.
57
// The algorithm is simple
58
// Find the text region of node binary in memory
59
// 1: Examine the /proc/self/maps to determine the currently mapped text
60
// region and obtain the start and end
61
// Modify the start to point to the very beginning of node text segment
62
// (from variable nodetext setup in ld.script)
63
// Align the address of start and end to Large Page Boundaries
64
//
65
// 2: Move the text region to large pages
66
// Map a new area and copy the original code there
67
// Use mmap using the start address with MAP_FIXED so we get exactly the
68
// same virtual address
69
// Use madvise with MADV_HUGEPAGE to use Anonymous 2M Pages
70
// If successful copy the code there and unmap the original region.
71
72
#if defined(__linux__)
73
extern "C" {
74
extern char __executable_start;
75
}  // extern "C"
76
#endif  // defined(__linux__)
77
78
#endif  // defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
79
namespace node {
80
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
81
82
namespace {
83
84
struct text_region {
85
  char* from;
86
  char* to;
87
  int   total_hugepages;
88
  bool  found_text_region;
89
};
90
91
static const size_t hps = 2L * 1024 * 1024;
92
93
static void PrintWarning(const char* warn) {
94
  fprintf(stderr, "Hugepages WARNING: %s\n", warn);
95
}
96
97
static void PrintSystemError(int error) {
98
  PrintWarning(strerror(error));
99
}
100
101
inline uintptr_t hugepage_align_up(uintptr_t addr) {
102
  return (((addr) + (hps) - 1) & ~((hps) - 1));
103
}
104
105
inline uintptr_t hugepage_align_down(uintptr_t addr) {
106
  return ((addr) & ~((hps) - 1));
107
}
108
109
// The format of the maps file is the following
110
// address           perms offset  dev   inode       pathname
111
// 00400000-00452000 r-xp 00000000 08:02 173521      /usr/bin/dbus-daemon
112
// This is also handling the case where the first line is not the binary.
113
114
struct text_region FindNodeTextRegion() {
115
  struct text_region nregion;
116
  nregion.found_text_region = false;
117
#if defined(__linux__)
118
  std::ifstream ifs;
119
  std::string map_line;
120
  std::string permission;
121
  std::string dev;
122
  char dash;
123
  uintptr_t start, end, offset, inode;
124
125
  ifs.open("/proc/self/maps");
126
  if (!ifs) {
127
    PrintWarning("could not open /proc/self/maps");
128
    return nregion;
129
  }
130
131
  while (std::getline(ifs, map_line)) {
132
    std::istringstream iss(map_line);
133
    iss >> std::hex >> start;
134
    iss >> dash;
135
    iss >> std::hex >> end;
136
    iss >> permission;
137
    iss >> offset;
138
    iss >> dev;
139
    iss >> inode;
140
141
    if (inode == 0)
142
      continue;
143
144
    std::string pathname;
145
    iss >> pathname;
146
147
    if (start != reinterpret_cast<uintptr_t>(&__executable_start))
148
      continue;
149
150
    // The next line is our .text section.
151
    if (!std::getline(ifs, map_line))
152
      break;
153
154
    iss = std::istringstream(map_line);
155
    iss >> std::hex >> start;
156
    iss >> dash;
157
    iss >> std::hex >> end;
158
    iss >> permission;
159
160
    if (permission != "r-xp")
161
      break;
162
163
    char* from = reinterpret_cast<char*>(hugepage_align_up(start));
164
    char* to = reinterpret_cast<char*>(hugepage_align_down(end));
165
166
    if (from >= to)
167
      break;
168
169
    size_t size = to - from;
170
    nregion.found_text_region = true;
171
    nregion.from = from;
172
    nregion.to = to;
173
    nregion.total_hugepages = size / hps;
174
175
    break;
176
  }
177
178
  ifs.close();
179
#elif defined(__FreeBSD__)
180
  std::string exename;
181
  {
182
    char selfexe[PATH_MAX];
183
    size_t count = sizeof(selfexe);
184
    if (uv_exepath(selfexe, &count))
185
      return nregion;
186
187
    exename = std::string(selfexe, count);
188
  }
189
190
  size_t numpg;
191
  int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
192
  const size_t miblen = arraysize(mib);
193
  if (sysctl(mib, miblen, nullptr, &numpg, nullptr, 0) == -1) {
194
    return nregion;
195
  }
196
197
  // Enough for struct kinfo_vmentry.
198
  numpg = numpg * 4 / 3;
199
  auto alg = std::vector<char>(numpg);
200
201
  if (sysctl(mib, miblen, alg.data(), &numpg, nullptr, 0) == -1) {
202
    return nregion;
203
  }
204
205
  char* start = alg.data();
206
  char* end = start + numpg;
207
208
  while (start < end) {
209
    kinfo_vmentry* entry = reinterpret_cast<kinfo_vmentry*>(start);
210
    const size_t cursz = entry->kve_structsize;
211
    if (cursz == 0) {
212
      break;
213
    }
214
215
    if (entry->kve_path[0] == '\0') {
216
      continue;
217
    }
218
    bool excmapping = ((entry->kve_protection & KVME_PROT_READ) &&
219
     (entry->kve_protection & KVME_PROT_EXEC));
220
221
    if (!strcmp(exename.c_str(), entry->kve_path) && excmapping) {
222
      char* estart =
223
        reinterpret_cast<char*>(hugepage_align_up(entry->kve_start));
224
      char* eend =
225
        reinterpret_cast<char*>(hugepage_align_down(entry->kve_end));
226
      size_t size = eend - estart;
227
      nregion.found_text_region = true;
228
      nregion.from = estart;
229
      nregion.to = eend;
230
      nregion.total_hugepages = size / hps;
231
      break;
232
    }
233
    start += cursz;
234
  }
235
#elif defined(__APPLE__)
236
  struct vm_region_submap_info_64 map;
237
  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
238
  vm_address_t addr = 0UL;
239
  vm_size_t size = 0;
240
  natural_t depth = 1;
241
242
  while (true) {
243
    if (vm_region_recurse_64(mach_task_self(), &addr, &size, &depth,
244
                             reinterpret_cast<vm_region_info_64_t>(&map),
245
                             &count) != KERN_SUCCESS) {
246
      break;
247
    }
248
249
    if (map.is_submap) {
250
      depth++;
251
    } else {
252
      char* start = reinterpret_cast<char*>(hugepage_align_up(addr));
253
      char* end = reinterpret_cast<char*>(hugepage_align_down(addr+size));
254
      size_t esize = end - start;
255
256
      if (end > start && (map.protection & VM_PROT_READ) != 0 &&
257
          (map.protection & VM_PROT_EXECUTE) != 0) {
258
        nregion.found_text_region = true;
259
        nregion.from = start;
260
        nregion.to = end;
261
        nregion.total_hugepages = esize / hps;
262
        break;
263
      }
264
265
      addr += size;
266
      size = 0;
267
    }
268
  }
269
#endif
270
  return nregion;
271
}
272
273
#if defined(__linux__)
274
1
bool IsTransparentHugePagesEnabled() {
275
2
  std::ifstream ifs;
276
277
1
  ifs.open("/sys/kernel/mm/transparent_hugepage/enabled");
278
1
  if (!ifs) {
279
    PrintWarning("could not open /sys/kernel/mm/transparent_hugepage/enabled");
280
    return false;
281
  }
282
283
2
  std::string always, madvise;
284
1
  if (ifs.is_open()) {
285
2
    while (ifs >> always >> madvise) {}
286
  }
287
1
  ifs.close();
288
289

1
  return always == "[always]" || madvise == "[madvise]";
290
}
291
#elif defined(__FreeBSD__)
292
static bool IsSuperPagesEnabled() {
293
  // It is enabled by default on amd64.
294
  unsigned int super_pages = 0;
295
  size_t super_pages_length = sizeof(super_pages);
296
  return sysctlbyname("vm.pmap.pg_ps_enabled",
297
                      &super_pages,
298
                      &super_pages_length,
299
                      nullptr,
300
                      0) != -1 &&
301
         super_pages >= 1;
302
}
303
#endif
304
305
}  // End of anonymous namespace
306
307
// Moving the text region to large pages. We need to be very careful.
308
// 1: This function itself should not be moved.
309
// We use a gcc attributes
310
// (__section__) to put it outside the ".text" section
311
// (__aligned__) to align it at 2M boundary
312
// (__noline__) to not inline this function
313
// 2: This function should not call any function(s) that might be moved.
314
// a. map a new area and copy the original code there
315
// b. mmap using the start address with MAP_FIXED so we get exactly
316
//    the same virtual address (except on macOS).
317
// c. madvise with MADV_HUGEPAGE
318
// d. If successful copy the code there and unmap the original region
319
int
320
#if !defined(__APPLE__)
321
__attribute__((__section__(".lpstub")))
322
#else
323
__attribute__((__section__("__TEXT,__lpstub")))
324
#endif
325
__attribute__((__aligned__(hps)))
326
__attribute__((__noinline__))
327
MoveTextRegionToLargePages(const text_region& r) {
328
  void* nmem = nullptr;
329
  void* tmem = nullptr;
330
  int ret = 0;
331
332
  size_t size = r.to - r.from;
333
  void* start = r.from;
334
335
  // Allocate temporary region preparing for copy.
336
  nmem = mmap(nullptr, size,
337
              PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
338
  if (nmem == MAP_FAILED) {
339
    PrintSystemError(errno);
340
    return -1;
341
  }
342
343
  memcpy(nmem, r.from, size);
344
345
#if defined(__linux__)
346
// We already know the original page is r-xp
347
// (PROT_READ, PROT_EXEC, MAP_PRIVATE)
348
// We want PROT_WRITE because we are writing into it.
349
// We want it at the fixed address and we use MAP_FIXED.
350
  tmem = mmap(start, size,
351
              PROT_READ | PROT_WRITE | PROT_EXEC,
352
              MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0);
353
  if (tmem == MAP_FAILED) {
354
    PrintSystemError(errno);
355
    return -1;
356
  }
357
358
  ret = madvise(tmem, size, 14 /* MADV_HUGEPAGE */);
359
  if (ret == -1) {
360
    PrintSystemError(errno);
361
    ret = munmap(tmem, size);
362
    if (ret == -1) {
363
      PrintSystemError(errno);
364
    }
365
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
366
    return -1;
367
  }
368
  memcpy(start, nmem, size);
369
#elif defined(__FreeBSD__)
370
  tmem = mmap(start, size,
371
              PROT_READ | PROT_WRITE | PROT_EXEC,
372
              MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED |
373
              MAP_ALIGNED_SUPER, -1 , 0);
374
  if (tmem == MAP_FAILED) {
375
    PrintSystemError(errno);
376
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
377
    return -1;
378
  }
379
#elif defined(__APPLE__)
380
  // There is not enough room to reserve the mapping close
381
  // to the region address so we content to give a hint
382
  // without forcing the new address being closed to.
383
  // We explicitally gives all permission since we plan
384
  // to write into it.
385
  tmem = mmap(start, size,
386
              PROT_READ | PROT_WRITE | PROT_EXEC,
387
              MAP_PRIVATE | MAP_ANONYMOUS,
388
              VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
389
  if (tmem == MAP_FAILED) {
390
    PrintSystemError(errno);
391
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
392
    return -1;
393
  }
394
  memcpy(tmem, nmem, size);
395
  ret = mprotect(start, size, PROT_READ | PROT_WRITE | PROT_EXEC);
396
  if (ret == -1) {
397
    PrintSystemError(errno);
398
    ret = munmap(tmem, size);
399
    if (ret == -1) {
400
      PrintSystemError(errno);
401
    }
402
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
403
    return -1;
404
  }
405
  memcpy(start, tmem, size);
406
#endif
407
408
  ret = mprotect(start, size, PROT_READ | PROT_EXEC);
409
  if (ret == -1) {
410
    PrintSystemError(errno);
411
    ret = munmap(tmem, size);
412
    if (ret == -1) {
413
      PrintSystemError(errno);
414
    }
415
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
416
    return -1;
417
  }
418
  if (-1 == munmap(nmem, size)) PrintSystemError(errno);
419
  return ret;
420
}
421
#endif  // defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
422
423
// This is the primary API called from main.
424
1
int MapStaticCodeToLargePages() {
425
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
426
1
  bool have_thp = false;
427
#if defined(__linux__)
428
1
  have_thp = IsTransparentHugePagesEnabled();
429
#elif defined(__FreeBSD__)
430
  have_thp = IsSuperPagesEnabled();
431
#elif defined(__APPLE__)
432
  // pse-36 flag is present in recent mac x64 products.
433
  have_thp = true;
434
#endif
435
1
  if (!have_thp)
436
1
    return EACCES;
437
438
  struct text_region r = FindNodeTextRegion();
439
  if (r.found_text_region == false)
440
    return ENOENT;
441
442
#if defined(__FreeBSD__)
443
  if (r.from < reinterpret_cast<void*>(&MoveTextRegionToLargePages))
444
    return -1;
445
#endif
446
447
  return MoveTextRegionToLargePages(r);
448
#else
449
  return ENOTSUP;
450
#endif
451
}
452
453
1
const char* LargePagesError(int status) {
454

1
  switch (status) {
455
    case ENOTSUP:
456
      return "Mapping to large pages is not supported.";
457
458
    case EACCES:
459
1
      return "Large pages are not enabled.";
460
461
    case ENOENT:
462
      return "failed to find text region";
463
464
    case -1:
465
      return "Mapping code to large pages failed. Reverting to default page "
466
          "size.";
467
468
    case 0:
469
      return "OK";
470
471
    default:
472
      return "Unknown error";
473
  }
474
}
475
476

12606
}  // namespace node