Compiler Barrier

Without cpu_relax(), it becomes a infinite loop, for the compiler can’t see x is change in the loop body. Using the inline assembly, namely memory clobber, the compiler relinquishes the assumption that x is kept intact, and reloads it each iteration. In addition, the compiler is smart enough to only flush global visible variables.

Let’s illustrate it by view the generated assembly code for the following C code. Assembly is obtained using clang -S -O test.c.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <stdint.h>
#include <stdio.h>

unsigned int x;

#define cpu_relax() asm volatile("pause" ::: "memory")

void f()
{

// unsigned int y;
static unsigned int y;
while (x == 0) {
y += 3;
cpu_relax();
;
}
x = y;
}

int main(int argc, char *argv[])
{

return 0;
}

In this case, y is a local variable, so it’s kept in %eax without being flushed to memory.

1
...
# local variable
f:                                      # @f
	.cfi_startproc
# BB#0:
	cmpl	$0, x(%rip)
                                        # implicit-def: %EAX
	jne	.LBB0_2
	.p2align	4, 0x90
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
	addl	$3, %eax
	#APP
	pause
	#NO_APP
	cmpl	$0, x(%rip)
	je	.LBB0_1
.LBB0_2:
	movl	%eax, x(%rip)
	retq
...

In this case, y is static variable, whose storage if static, so its value needs to be flushed and re-read around the barrier.

1
...
# static local variable
f:                                      # @f
	.cfi_startproc
# BB#0:
	movl	f.y(%rip), %eax
	cmpl	$0, x(%rip)
	jne	.LBB0_2
	.p2align	4, 0x90
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
	addl	$3, %eax
	movl	%eax, f.y(%rip)
	#APP
	pause
	#NO_APP
	cmpl	$0, x(%rip)
	movl	f.y(%rip), %eax
	je	.LBB0_1
.LBB0_2:
	movl	%eax, x(%rip)
	retq
...

Reference