GCC Code Coverage Report
Directory: ../ Exec Total Coverage
File: /home/iojs/build/workspace/node-test-commit-linux-coverage-daily/nodes/benchmark/out/../src/large_pages/node_large_page.cc Lines: 12 108 11.1 %
Date: 2020-02-19 22:14:06 Branches: 8 52 15.4 %

Line Branch Exec Source
1
// Copyright (C) 2018 Intel Corporation
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a copy
4
// of this software and associated documentation files (the "Software"),
5
// to deal in the Software without restriction, including without limitation
6
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
7
// and/or sell copies of the Software, and to permit persons to whom
8
// the Software is furnished to do so, subject to the following conditions:
9
//
10
// The above copyright notice and this permission notice shall be included
11
// in all copies or substantial portions of the Software.
12
//
13
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17
// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19
// OR OTHER DEALINGS IN THE SOFTWARE.
20
//
21
// SPDX-License-Identifier: MIT
22
23
#include "node_large_page.h"
24
#include "util.h"
25
#include "uv.h"
26
27
#include <fcntl.h>  // _O_RDWR
28
#include <sys/types.h>
29
#include <sys/mman.h>
30
#if defined(__FreeBSD__)
31
#include <sys/sysctl.h>
32
#include <sys/user.h>
33
#elif defined(__APPLE__)
34
#include <mach/vm_map.h>
35
#endif
36
#include <unistd.h>  // readlink
37
38
#include <cerrno>   // NOLINT(build/include)
39
#include <climits>  // PATH_MAX
40
#include <clocale>
41
#include <csignal>
42
#include <cstdio>
43
#include <cstdlib>
44
#include <cstdint>
45
#include <cstring>
46
#include <string>
47
#include <fstream>
48
#include <iostream>
49
#include <sstream>
50
#include <vector>
51
52
// The functions in this file map the text segment of node into 2M pages.
53
// The algorithm is simple
54
// Find the text region of node binary in memory
55
// 1: Examine the /proc/self/maps to determine the currently mapped text
56
// region and obtain the start and end
57
// Modify the start to point to the very beginning of node text segment
58
// (from variable nodetext setup in ld.script)
59
// Align the address of start and end to Large Page Boundaries
60
//
61
// 2: Move the text region to large pages
62
// Map a new area and copy the original code there
63
// Use mmap using the start address with MAP_FIXED so we get exactly the
64
// same virtual address
65
// Use madvise with MADV_HUGEPAGE to use Anonymous 2M Pages
66
// If successful copy the code there and unmap the original region.
67
68
#if defined(__linux__)
69
extern "C" {
70
extern char __executable_start;
71
}  // extern "C"
72
#endif  // defined(__linux__)
73
74
namespace node {
75
76
struct text_region {
77
  char* from;
78
  char* to;
79
  int   total_hugepages;
80
  bool  found_text_region;
81
};
82
83
static const size_t hps = 2L * 1024 * 1024;
84
85
static void PrintWarning(const char* warn) {
86
  fprintf(stderr, "Hugepages WARNING: %s\n", warn);
87
}
88
89
static void PrintSystemError(int error) {
90
  PrintWarning(strerror(error));
91
}
92
93
inline uintptr_t hugepage_align_up(uintptr_t addr) {
94
  return (((addr) + (hps) - 1) & ~((hps) - 1));
95
}
96
97
inline uintptr_t hugepage_align_down(uintptr_t addr) {
98
  return ((addr) & ~((hps) - 1));
99
}
100
101
// The format of the maps file is the following
102
// address           perms offset  dev   inode       pathname
103
// 00400000-00452000 r-xp 00000000 08:02 173521      /usr/bin/dbus-daemon
104
// This is also handling the case where the first line is not the binary.
105
106
static struct text_region FindNodeTextRegion() {
107
  struct text_region nregion;
108
  nregion.found_text_region = false;
109
#if defined(__linux__)
110
  std::ifstream ifs;
111
  std::string map_line;
112
  std::string permission;
113
  std::string dev;
114
  char dash;
115
  uintptr_t start, end, offset, inode;
116
117
  ifs.open("/proc/self/maps");
118
  if (!ifs) {
119
    PrintWarning("could not open /proc/self/maps");
120
    return nregion;
121
  }
122
123
  while (std::getline(ifs, map_line)) {
124
    std::istringstream iss(map_line);
125
    iss >> std::hex >> start;
126
    iss >> dash;
127
    iss >> std::hex >> end;
128
    iss >> permission;
129
    iss >> offset;
130
    iss >> dev;
131
    iss >> inode;
132
133
    if (inode == 0)
134
      continue;
135
136
    std::string pathname;
137
    iss >> pathname;
138
139
    if (start != reinterpret_cast<uintptr_t>(&__executable_start))
140
      continue;
141
142
    // The next line is our .text section.
143
    if (!std::getline(ifs, map_line))
144
      break;
145
146
    iss = std::istringstream(map_line);
147
    iss >> std::hex >> start;
148
    iss >> dash;
149
    iss >> std::hex >> end;
150
    iss >> permission;
151
152
    if (permission != "r-xp")
153
      break;
154
155
    char* from = reinterpret_cast<char*>(hugepage_align_up(start));
156
    char* to = reinterpret_cast<char*>(hugepage_align_down(end));
157
158
    if (from >= to)
159
      break;
160
161
    size_t size = to - from;
162
    nregion.found_text_region = true;
163
    nregion.from = from;
164
    nregion.to = to;
165
    nregion.total_hugepages = size / hps;
166
167
    break;
168
  }
169
170
  ifs.close();
171
#elif defined(__FreeBSD__)
172
  std::string exename;
173
  {
174
    char selfexe[PATH_MAX];
175
    size_t count = sizeof(selfexe);
176
    if (uv_exepath(selfexe, &count))
177
      return nregion;
178
179
    exename = std::string(selfexe, count);
180
  }
181
182
  size_t numpg;
183
  int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
184
  const size_t miblen = arraysize(mib);
185
  if (sysctl(mib, miblen, nullptr, &numpg, nullptr, 0) == -1) {
186
    return nregion;
187
  }
188
189
  // Enough for struct kinfo_vmentry.
190
  numpg = numpg * 4 / 3;
191
  auto alg = std::vector<char>(numpg);
192
193
  if (sysctl(mib, miblen, alg.data(), &numpg, nullptr, 0) == -1) {
194
    return nregion;
195
  }
196
197
  char* start = alg.data();
198
  char* end = start + numpg;
199
200
  while (start < end) {
201
    kinfo_vmentry* entry = reinterpret_cast<kinfo_vmentry*>(start);
202
    const size_t cursz = entry->kve_structsize;
203
    if (cursz == 0) {
204
      break;
205
    }
206
207
    if (entry->kve_path[0] == '\0') {
208
      continue;
209
    }
210
    bool excmapping = ((entry->kve_protection & KVME_PROT_READ) &&
211
     (entry->kve_protection & KVME_PROT_EXEC));
212
213
    if (!strcmp(exename.c_str(), entry->kve_path) && excmapping) {
214
      char* estart =
215
        reinterpret_cast<char*>(hugepage_align_up(entry->kve_start));
216
      char* eend =
217
        reinterpret_cast<char*>(hugepage_align_down(entry->kve_end));
218
      size_t size = eend - estart;
219
      nregion.found_text_region = true;
220
      nregion.from = estart;
221
      nregion.to = eend;
222
      nregion.total_hugepages = size / hps;
223
      break;
224
    }
225
    start += cursz;
226
  }
227
#elif defined(__APPLE__)
228
  struct vm_region_submap_info_64 map;
229
  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
230
  vm_address_t addr = 0UL;
231
  vm_size_t size = 0;
232
  natural_t depth = 1;
233
234
  while (true) {
235
    if (vm_region_recurse_64(mach_task_self(), &addr, &size, &depth,
236
                             reinterpret_cast<vm_region_info_64_t>(&map),
237
                             &count) != KERN_SUCCESS) {
238
      break;
239
    }
240
241
    if (map.is_submap) {
242
      depth++;
243
    } else {
244
      char* start = reinterpret_cast<char*>(hugepage_align_up(addr));
245
      char* end = reinterpret_cast<char*>(hugepage_align_down(addr+size));
246
      size_t esize = end - start;
247
248
      if (end > start && (map.protection & VM_PROT_READ) != 0 &&
249
          (map.protection & VM_PROT_EXECUTE) != 0) {
250
        nregion.found_text_region = true;
251
        nregion.from = start;
252
        nregion.to = end;
253
        nregion.total_hugepages = esize / hps;
254
        break;
255
      }
256
257
      addr += size;
258
      size = 0;
259
    }
260
  }
261
#endif
262
  return nregion;
263
}
264
265
#if defined(__linux__)
266
1
static bool IsTransparentHugePagesEnabled() {
267
2
  std::ifstream ifs;
268
269
1
  ifs.open("/sys/kernel/mm/transparent_hugepage/enabled");
270
1
  if (!ifs) {
271
    PrintWarning("could not open /sys/kernel/mm/transparent_hugepage/enabled");
272
    return false;
273
  }
274
275
2
  std::string always, madvise;
276
1
  if (ifs.is_open()) {
277
2
    while (ifs >> always >> madvise) {}
278
  }
279
1
  ifs.close();
280
281

1
  return always == "[always]" || madvise == "[madvise]";
282
}
283
#elif defined(__FreeBSD__)
284
static bool IsSuperPagesEnabled() {
285
  // It is enabled by default on amd64.
286
  unsigned int super_pages = 0;
287
  size_t super_pages_length = sizeof(super_pages);
288
  return sysctlbyname("vm.pmap.pg_ps_enabled",
289
                      &super_pages,
290
                      &super_pages_length,
291
                      nullptr,
292
                      0) != -1 &&
293
         super_pages >= 1;
294
}
295
#endif
296
297
// Moving the text region to large pages. We need to be very careful.
298
// 1: This function itself should not be moved.
299
// We use a gcc attributes
300
// (__section__) to put it outside the ".text" section
301
// (__aligned__) to align it at 2M boundary
302
// (__noline__) to not inline this function
303
// 2: This function should not call any function(s) that might be moved.
304
// a. map a new area and copy the original code there
305
// b. mmap using the start address with MAP_FIXED so we get exactly
306
//    the same virtual address (except on macOS).
307
// c. madvise with MADV_HUGEPAGE
308
// d. If successful copy the code there and unmap the original region
309
int
310
#if !defined(__APPLE__)
311
__attribute__((__section__(".lpstub")))
312
#else
313
__attribute__((__section__("__TEXT,__lpstub")))
314
#endif
315
__attribute__((__aligned__(hps)))
316
__attribute__((__noinline__))
317
MoveTextRegionToLargePages(const text_region& r) {
318
  void* nmem = nullptr;
319
  void* tmem = nullptr;
320
  int ret = 0;
321
322
  size_t size = r.to - r.from;
323
  void* start = r.from;
324
325
  // Allocate temporary region preparing for copy.
326
  nmem = mmap(nullptr, size,
327
              PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
328
  if (nmem == MAP_FAILED) {
329
    PrintSystemError(errno);
330
    return -1;
331
  }
332
333
  memcpy(nmem, r.from, size);
334
335
#if defined(__linux__)
336
// We already know the original page is r-xp
337
// (PROT_READ, PROT_EXEC, MAP_PRIVATE)
338
// We want PROT_WRITE because we are writing into it.
339
// We want it at the fixed address and we use MAP_FIXED.
340
  tmem = mmap(start, size,
341
              PROT_READ | PROT_WRITE | PROT_EXEC,
342
              MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0);
343
  if (tmem == MAP_FAILED) {
344
    PrintSystemError(errno);
345
    return -1;
346
  }
347
348
  ret = madvise(tmem, size, 14 /* MADV_HUGEPAGE */);
349
  if (ret == -1) {
350
    PrintSystemError(errno);
351
    ret = munmap(tmem, size);
352
    if (ret == -1) {
353
      PrintSystemError(errno);
354
    }
355
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
356
    return -1;
357
  }
358
  memcpy(start, nmem, size);
359
#elif defined(__FreeBSD__)
360
  tmem = mmap(start, size,
361
              PROT_READ | PROT_WRITE | PROT_EXEC,
362
              MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED |
363
              MAP_ALIGNED_SUPER, -1 , 0);
364
  if (tmem == MAP_FAILED) {
365
    PrintSystemError(errno);
366
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
367
    return -1;
368
  }
369
#elif defined(__APPLE__)
370
  // There is not enough room to reserve the mapping close
371
  // to the region address so we content to give a hint
372
  // without forcing the new address being closed to.
373
  // We explicitally gives all permission since we plan
374
  // to write into it.
375
  tmem = mmap(start, size,
376
              PROT_READ | PROT_WRITE | PROT_EXEC,
377
              MAP_PRIVATE | MAP_ANONYMOUS,
378
              VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
379
  if (tmem == MAP_FAILED) {
380
    PrintSystemError(errno);
381
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
382
    return -1;
383
  }
384
  memcpy(tmem, nmem, size);
385
  ret = mprotect(start, size, PROT_READ | PROT_WRITE | PROT_EXEC);
386
  if (ret == -1) {
387
    PrintSystemError(errno);
388
    ret = munmap(tmem, size);
389
    if (ret == -1) {
390
      PrintSystemError(errno);
391
    }
392
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
393
    return -1;
394
  }
395
  memcpy(start, tmem, size);
396
#endif
397
398
  ret = mprotect(start, size, PROT_READ | PROT_EXEC);
399
  if (ret == -1) {
400
    PrintSystemError(errno);
401
    ret = munmap(tmem, size);
402
    if (ret == -1) {
403
      PrintSystemError(errno);
404
    }
405
    if (-1 == munmap(nmem, size)) PrintSystemError(errno);
406
    return -1;
407
  }
408
  if (-1 == munmap(nmem, size)) PrintSystemError(errno);
409
  return ret;
410
}
411
412
// This is the primary API called from main.
413
int MapStaticCodeToLargePages() {
414
  struct text_region r = FindNodeTextRegion();
415
  if (r.found_text_region == false) {
416
    PrintWarning("failed to find text region");
417
    return -1;
418
  }
419
420
#if defined(__FreeBSD__)
421
  if (r.from < reinterpret_cast<void*>(&MoveTextRegionToLargePages))
422
    return -1;
423
#endif
424
425
  return MoveTextRegionToLargePages(r);
426
}
427
428
1
bool IsLargePagesEnabled() {
429
#if defined(__linux__)
430
1
  return IsTransparentHugePagesEnabled();
431
#elif defined(__FreeBSD__)
432
  return IsSuperPagesEnabled();
433
#elif defined(__APPLE__)
434
  // pse-36 flag is present in recent mac x64 products.
435
  return true;
436
#endif
437
}
438
439

12558
}  // namespace node