GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/large_pages/node_large_page.cc Lines: 18 103 17.5 %
Date: 2020-09-06 22:14:11 Branches: 10 68 14.7 %

Line Branch Exec Source
1
// Copyright (C) 2018 Intel Corporation
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a copy
4
// of this software and associated documentation files (the "Software"),
5
// to deal in the Software without restriction, including without limitation
6
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
// and/or sell copies of the Software, and to permit persons to whom
8
// the Software is furnished to do so, subject to the following conditions:
9
//
10
// The above copyright notice and this permission notice shall be included
11
// in all copies or substantial portions of the Software.
12
//
13
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17
// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19
// OR OTHER DEALINGS IN THE SOFTWARE.
20
//
21
// SPDX-License-Identifier: MIT
22
23
// The functions in this file map the .text section of Node.js into 2MB pages.
24
// They perform the following steps:
25
//
26
// 1: Find the Node.js binary's `.text` section in memory. This is done below in
27
//    `FindNodeTextRegion`. It is accomplished in a platform-specific way. On
28
//    Linux and FreeBSD, `dl_iterate_phdr(3)` is used. When the region is found,
29
//    it is "trimmed" as follows:
30
//    * Modify the start to point to the very beginning of the Node.js `.text`
31
//      section (from symbol `__node_text_start` declared in node_text_start.S).
32
//    * Possibly modify the end to account for the `lpstub` section which
33
//      contains `MoveTextRegionToLargePages`, the function we do not wish to
34
//      move (see below).
35
//    * Align the address of the start to its nearest higher large page
36
//      boundary.
37
//    * Align the address of the end to its nearest lower large page boundary.
38
//
39
// 2: Move the text region to large pages. This is done below in
40
//    `MoveTextRegionToLargePages`. We need to be very careful:
41
//    a) `MoveTextRegionToLargePages` itself should not be moved.
42
//       We use gcc attributes
43
//       (__section__) to put it outside the `.text` section,
44
//       (__aligned__) to align it at the 2M boundary, and
45
//       (__noline__) to not inline this function.
46
//    b) `MoveTextRegionToLargePages` should not call any function(s) that might
47
//       be moved.
48
//    To move the .text section, perform the following steps:
49
//      * Map a new, temporary area and copy the original code there.
50
//      * Use mmap using the start address with MAP_FIXED so we get exactly the
51
//        same virtual address (except on OSX). On platforms other than Linux,
52
//        use mmap flags to request hugepages.
53
//      * On Linux use madvise with MADV_HUGEPAGE to use anonymous 2MB pages.
54
//      * If successful copy the code to the newly mapped area and protect it to
55
//        be readable and executable.
56
//      * Unmap the temporary area.
57
58
#include "node_large_page.h"
59
60
#include <cerrno>   // NOLINT(build/include)
61
62
// Besides returning ENOTSUP at runtime we do nothing if this define is missing.
63
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
64
#include "debug_utils-inl.h"
65
66
#if defined(__linux__) || defined(__FreeBSD__)
67
#if defined(__linux__)
68
#ifndef _GNU_SOURCE
69
#define _GNU_SOURCE
70
#endif  // ifndef _GNU_SOURCE
71
#elif defined(__FreeBSD__)
72
#include "uv.h"  // uv_exepath
73
#endif  // defined(__linux__)
74
#include <link.h>
75
#endif  // defined(__linux__) || defined(__FreeBSD__)
76
77
#include <sys/types.h>
78
#include <sys/mman.h>
79
#if defined(__FreeBSD__)
80
#include <sys/sysctl.h>
81
#elif defined(__APPLE__)
82
#include <mach/vm_map.h>
83
#endif
84
85
#include <climits>  // PATH_MAX
86
#include <cstdlib>
87
#include <cstdint>
88
#include <cstring>
89
#include <string>
90
#include <fstream>
91
92
#if defined(__linux__) || defined(__FreeBSD__)
93
extern "C" {
94
// This symbol must be declared weak because this file becomes part of all
95
// Node.js targets (like node_mksnapshot, node_mkcodecache, and cctest) and
96
// those files do not supply the symbol.
97
extern char __attribute__((weak)) __node_text_start;
98
extern char __start_lpstub;
99
}  // extern "C"
100
#endif  // defined(__linux__) || defined(__FreeBSD__)
101
102
#endif  // defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
103
namespace node {
104
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
105
106
namespace {
107
108
struct text_region {
109
  char* from = nullptr;
110
  char* to = nullptr;
111
  bool found_text_region = false;
112
};
113
114
static const size_t hps = 2L * 1024 * 1024;
115
116
template <typename... Args>
117
inline void Debug(std::string fmt, Args&&... args) {
118
  node::Debug(&per_process::enabled_debug_list,
119
              DebugCategory::HUGEPAGES,
120
              (std::string("Hugepages info: ") + fmt).c_str(),
121
              std::forward<Args>(args)...);
122
}
123
124
inline void PrintWarning(const char* warn) {
125
  fprintf(stderr, "Hugepages WARNING: %s\n", warn);
126
}
127
128
inline void PrintSystemError(int error) {
129
  PrintWarning(strerror(error));
130
}
131
132
inline uintptr_t hugepage_align_up(uintptr_t addr) {
133
  return (((addr) + (hps) - 1) & ~((hps) - 1));
134
}
135
136
inline uintptr_t hugepage_align_down(uintptr_t addr) {
137
  return ((addr) & ~((hps) - 1));
138
}
139
140
#if defined(__linux__) || defined(__FreeBSD__)
141
#if defined(__FreeBSD__)
142
#ifndef ElfW
143
#define ElfW(name) Elf_##name
144
#endif  // ifndef ElfW
145
#endif  // defined(__FreeBSD__)
146
147
struct dl_iterate_params {
148
  uintptr_t start = 0;
149
  uintptr_t end = 0;
150
  uintptr_t reference_sym = reinterpret_cast<uintptr_t>(&__node_text_start);
151
  std::string exename;
152
};
153
154
int FindMapping(struct dl_phdr_info* info, size_t, void* data) {
155
  auto dl_params = static_cast<dl_iterate_params*>(data);
156
  if (dl_params->exename == std::string(info->dlpi_name)) {
157
    for (int idx = 0; idx < info->dlpi_phnum; idx++) {
158
      const ElfW(Phdr)* phdr = &info->dlpi_phdr[idx];
159
      if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X)) {
160
        uintptr_t start = info->dlpi_addr + phdr->p_vaddr;
161
        uintptr_t end = start + phdr->p_memsz;
162
163
        if (dl_params->reference_sym >= start &&
164
            dl_params->reference_sym <= end) {
165
          dl_params->start = start;
166
          dl_params->end = end;
167
          return 1;
168
        }
169
      }
170
    }
171
  }
172
  return 0;
173
}
174
#endif  // defined(__linux__) || defined(__FreeBSD__)
175
176
struct text_region FindNodeTextRegion() {
177
  struct text_region nregion;
178
#if defined(__linux__) || defined(__FreeBSD__)
179
  dl_iterate_params dl_params;
180
  uintptr_t lpstub_start = reinterpret_cast<uintptr_t>(&__start_lpstub);
181
182
#if defined(__FreeBSD__)
183
  // On FreeBSD we need the name of the binary, because `dl_iterate_phdr` does
184
  // not pass in an empty string as the `dlpi_name` of the binary but rather its
185
  // absolute path.
186
  {
187
    char selfexe[PATH_MAX];
188
    size_t count = sizeof(selfexe);
189
    if (uv_exepath(selfexe, &count))
190
      return nregion;
191
    dl_params.exename = std::string(selfexe, count);
192
  }
193
#endif  // defined(__FreeBSD__)
194
195
  if (dl_iterate_phdr(FindMapping, &dl_params) == 1) {
196
    Debug("start: %p - sym: %p - end: %p\n",
197
          reinterpret_cast<void*>(dl_params.start),
198
          reinterpret_cast<void*>(dl_params.reference_sym),
199
          reinterpret_cast<void*>(dl_params.end));
200
201
    dl_params.start = dl_params.reference_sym;
202
    if (lpstub_start > dl_params.start && lpstub_start <= dl_params.end) {
203
      Debug("Trimming end for lpstub: %p\n",
204
            reinterpret_cast<void*>(lpstub_start));
205
      dl_params.end = lpstub_start;
206
    }
207
208
    if (dl_params.start < dl_params.end) {
209
      char* from = reinterpret_cast<char*>(hugepage_align_up(dl_params.start));
210
      char* to = reinterpret_cast<char*>(hugepage_align_down(dl_params.end));
211
      Debug("Aligned range is %p - %p\n", from, to);
212
      if (from < to) {
213
        size_t pagecount = (to - from) / hps;
214
        if (pagecount > 0) {
215
          nregion.found_text_region = true;
216
          nregion.from = from;
217
          nregion.to = to;
218
        }
219
      }
220
    }
221
  }
222
#elif defined(__APPLE__)
223
  struct vm_region_submap_info_64 map;
224
  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
225
  vm_address_t addr = 0UL;
226
  vm_size_t size = 0;
227
  natural_t depth = 1;
228
229
  while (true) {
230
    if (vm_region_recurse_64(mach_task_self(), &addr, &size, &depth,
231
                             reinterpret_cast<vm_region_info_64_t>(&map),
232
                             &count) != KERN_SUCCESS) {
233
      break;
234
    }
235
236
    if (map.is_submap) {
237
      depth++;
238
    } else {
239
      char* start = reinterpret_cast<char*>(hugepage_align_up(addr));
240
      char* end = reinterpret_cast<char*>(hugepage_align_down(addr+size));
241
242
      if (end > start && (map.protection & VM_PROT_READ) != 0 &&
243
          (map.protection & VM_PROT_EXECUTE) != 0) {
244
        nregion.found_text_region = true;
245
        nregion.from = start;
246
        nregion.to = end;
247
        break;
248
      }
249
250
      addr += size;
251
      size = 0;
252
    }
253
  }
254
#endif
255
  Debug("Found %d huge pages\n", (nregion.to - nregion.from) / hps);
256
  return nregion;
257
}
258
259
#if defined(__linux__)
260
1
bool IsTransparentHugePagesEnabled() {
261
2
  std::ifstream ifs;
262
263
1
  ifs.open("/sys/kernel/mm/transparent_hugepage/enabled");
264
1
  if (!ifs) {
265
    PrintWarning("could not open /sys/kernel/mm/transparent_hugepage/enabled");
266
    return false;
267
  }
268
269
2
  std::string always, madvise;
270
1
  if (ifs.is_open()) {
271
2
    while (ifs >> always >> madvise) {}
272
  }
273
1
  ifs.close();
274
275

1
  return always == "[always]" || madvise == "[madvise]";
276
}
277
#elif defined(__FreeBSD__)
278
bool IsSuperPagesEnabled() {
279
  // It is enabled by default on amd64.
280
  unsigned int super_pages = 0;
281
  size_t super_pages_length = sizeof(super_pages);
282
  return sysctlbyname("vm.pmap.pg_ps_enabled",
283
                      &super_pages,
284
                      &super_pages_length,
285
                      nullptr,
286
                      0) != -1 &&
287
         super_pages >= 1;
288
}
289
#endif
290
291
// Functions in this class must always be inlined because they must end up in
292
// the `lpstub` section rather than the `.text` section.
293
class MemoryMapPointer {
294
 public:
295
  FORCE_INLINE explicit MemoryMapPointer() {}
296
  FORCE_INLINE bool operator==(void* rhs) const { return mem_ == rhs; }
297
  FORCE_INLINE void* mem() const { return mem_; }
298
  MemoryMapPointer(const MemoryMapPointer&) = delete;
299
  MemoryMapPointer(MemoryMapPointer&&) = delete;
300
  void operator= (const MemoryMapPointer&) = delete;
301
  void operator= (const MemoryMapPointer&&) = delete;
302
  FORCE_INLINE void Reset(void* start,
303
                          size_t size,
304
                          int prot,
305
                          int flags,
306
                          int fd = -1,
307
                          size_t offset = 0) {
308
    mem_ = mmap(start, size, prot, flags, fd, offset);
309
    size_ = size;
310
  }
311
  FORCE_INLINE void Reset() {
312
    mem_ = nullptr;
313
    size_ = 0;
314
  }
315
  FORCE_INLINE ~MemoryMapPointer() {
316
    if (mem_ == nullptr) return;
317
    if (mem_ == MAP_FAILED) return;
318
    if (munmap(mem_, size_) == 0) return;
319
    PrintSystemError(errno);
320
  }
321
322
 private:
323
  size_t size_ = 0;
324
  void* mem_ = nullptr;
325
};
326
327
}  // End of anonymous namespace
328
329
int
330
#if !defined(__APPLE__)
331
__attribute__((__section__("lpstub")))
332
#else
333
__attribute__((__section__("__TEXT,__lpstub")))
334
#endif
335
__attribute__((__aligned__(hps)))
336
__attribute__((__noinline__))
337
MoveTextRegionToLargePages(const text_region& r) {
338
  MemoryMapPointer nmem;
339
  MemoryMapPointer tmem;
340
  void* start = r.from;
341
  size_t size = r.to - r.from;
342
343
  // Allocate a temporary region and back up the code we will re-map.
344
  nmem.Reset(nullptr, size,
345
             PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS);
346
  if (nmem.mem() == MAP_FAILED) goto fail;
347
  memcpy(nmem.mem(), r.from, size);
348
349
#if defined(__linux__)
350
// We already know the original page is r-xp
351
// (PROT_READ, PROT_EXEC, MAP_PRIVATE)
352
// We want PROT_WRITE because we are writing into it.
353
// We want it at the fixed address and we use MAP_FIXED.
354
  tmem.Reset(start, size,
355
             PROT_READ | PROT_WRITE | PROT_EXEC,
356
             MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED);
357
  if (tmem.mem() == MAP_FAILED) goto fail;
358
  if (madvise(tmem.mem(), size, 14 /* MADV_HUGEPAGE */) == -1) goto fail;
359
  memcpy(start, nmem.mem(), size);
360
#elif defined(__FreeBSD__)
361
  tmem.Reset(start, size,
362
             PROT_READ | PROT_WRITE | PROT_EXEC,
363
             MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED |
364
             MAP_ALIGNED_SUPER);
365
  if (tmem.mem() == MAP_FAILED) goto fail;
366
  memcpy(start, nmem.mem(), size);
367
#elif defined(__APPLE__)
368
  // There is not enough room to reserve the mapping close
369
  // to the region address so we content to give a hint
370
  // without forcing the new address being closed to.
371
  // We explicitally gives all permission since we plan
372
  // to write into it.
373
  tmem.Reset(start, size,
374
             PROT_READ | PROT_WRITE | PROT_EXEC,
375
             MAP_PRIVATE | MAP_ANONYMOUS,
376
             VM_FLAGS_SUPERPAGE_SIZE_2MB);
377
  if (tmem.mem() == MAP_FAILED) goto fail;
378
  memcpy(tmem.mem(), nmem.mem(), size);
379
  if (mprotect(start, size, PROT_READ | PROT_WRITE | PROT_EXEC) == -1)
380
    goto fail;
381
  memcpy(start, tmem.mem(), size);
382
#endif
383
384
  if (mprotect(start, size, PROT_READ | PROT_EXEC) == -1) goto fail;
385
386
  // We need not `munmap(tmem, size)` on success.
387
  tmem.Reset();
388
  return 0;
389
fail:
390
  PrintSystemError(errno);
391
  return -1;
392
}
393
#endif  // defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
394
395
// This is the primary API called from main.
396
1
int MapStaticCodeToLargePages() {
397
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
398
1
  bool have_thp = false;
399
#if defined(__linux__)
400
1
  have_thp = IsTransparentHugePagesEnabled();
401
#elif defined(__FreeBSD__)
402
  have_thp = IsSuperPagesEnabled();
403
#elif defined(__APPLE__)
404
  // pse-36 flag is present in recent mac x64 products.
405
  have_thp = true;
406
#endif
407
1
  if (!have_thp)
408
1
    return EACCES;
409
410
  struct text_region r = FindNodeTextRegion();
411
  if (r.found_text_region == false)
412
    return ENOENT;
413
414
  return MoveTextRegionToLargePages(r);
415
#else
416
  return ENOTSUP;
417
#endif
418
}
419
420
1
const char* LargePagesError(int status) {
421

1
  switch (status) {
422
    case ENOTSUP:
423
      return "Mapping to large pages is not supported.";
424
425
    case EACCES:
426
1
      return "Large pages are not enabled.";
427
428
    case ENOENT:
429
      return "failed to find text region";
430
431
    case -1:
432
      return "Mapping code to large pages failed. Reverting to default page "
433
          "size.";
434
435
    case 0:
436
      return "OK";
437
438
    default:
439
      return "Unknown error";
440
  }
441
}
442
443

13419
}  // namespace node