§Normal Page

First, we ask for 20 pages; since, by default, lazy mapping is used, none of them are mapped into physical memory. A page will be backed by physical memory on the first write into that page.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include <cassert>
#include <cstdio>
#include <cstdlib>

#include <unistd.h>
#include <sys/mman.h>

int main(void) {
constexpr int num_pages = 10;

unsigned char vec[num_pages];
int res;
size_t PS = sysconf(_SC_PAGESIZE);
void *addr = mmap(nullptr, num_pages * PS, PROT_READ | PROT_WRITE,
MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);

if (addr == MAP_FAILED) {
perror("mmap");
exit(1);
}

printf("Reserved some 4K-pages at: %p\n\n", addr);

res = mincore(addr, num_pages * PS, vec);
assert(res == 0);

puts("Reserved but no physical mem is used");
for (int i = 0; i < num_pages; ++i) {
assert((vec[i] & 1) == 0);
printf("%d", (vec[i] & 1));
}
puts("\n");

puts("Write to the the first 5 pages");
for (int i = 0; i < 5; ++i)
((char *)addr)[i * PS] = 1;

res = mincore(addr, num_pages * PS, vec);
assert(res == 0);

for (int i = 0; i < num_pages; ++i) {
assert((vec[i] & 1) == (i < 5));
printf("%d", (vec[i] & 1));
}
puts("\n");

puts("Write to the rest of pages");
for (int i = 5; i < num_pages; ++i)
((char *)addr)[i * PS] = 1;

res = mincore(addr, num_pages * PS, vec);
assert(res == 0);
for (int i = 0; i < num_pages; ++i)
assert((vec[i] & 1) == 1);
for (int i = 0; i < num_pages; ++i)
printf("%d", (vec[i] & 1));
puts("\n");

puts("Overwrite the existing mapping");
void* new_addr = mmap(addr, num_pages * PS, PROT_READ | PROT_WRITE,
MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, 0, 0);
assert(new_addr == addr);

res = mincore(addr, num_pages * PS, vec);
assert(res == 0);

puts("Physical mem is released");
for (int i = 0; i < num_pages; ++i)
printf("%d", (vec[i] & 1));
puts("\n");

return 0;
}

Output on my box:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
Reserved some 4K-pages at: 0x7f3133191000

Reserved but no physical mem is used
0000000000

Write to the the first 5 pages
1111100000

Write to the rest of pages
1111111111

Overwrite the existing mapping
Physical mem is released
0000000000

§Large Page

One needs to pre-allocate at least one 2M page.

1
2
3
4
5
$ echo 1 > "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
$ hugeadm --pool-list
Size Minimum Current Maximum Default
2097152 1 1 1 *
1073741824 0 0 0

The story is the same that physical mem is not used until the first access.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include <cassert>
#include <cstdio>
#include <cstdlib>

#include <unistd.h>
#include <sys/mman.h>

int main(int argc, char **argv)
{
int res;
size_t ps_2m = (1 << 21);
size_t PS = (size_t)sysconf(_SC_PAGESIZE);

unsigned char vec[20];

void *addr;

// Reserve one huge-page -- HugePages_Rsvd in /proc/meminfo is incremented.
// By omitting MAP_NORESERVE, we get the guarantee that a successful mmap entails successful writes to the memory.
// Otherwise, even though the mmap call succeeds, we may get a crash on writing the memory because no huge-page is
// available.
addr = mmap(nullptr, ps_2m * 1,
PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);
// MAP_NORESERVE|MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0);

if (addr == MAP_FAILED) {
perror("mmap");
exit(1);
}

// huge-page mem has been pre allocated but not tied to our reserved mem yet
res = mincore(addr, 10 * PS, vec);
assert(res == 0);

puts("Reserved but no physical mem is used");
for (int i = 0; i < 10; ++i) {
printf("%d", (vec[i] & 1));
}

puts("\n");

// write to the first byte
puts("Write to the first 1 byte");
((char *)addr)[0] = 1;

// mapping established; in-core
res = mincore(addr, 10 * PS, vec);
assert(res == 0);

for (int i = 0; i < 10; ++i) {
printf("%d", (vec[i] & 1));
}

puts("");

return 0;
}

Output on my box:

1
2
3
4
5
Reserved but no physical mem is used
0000000000

Write to the first 1 byte
1111111111