In concurrent programming, it’s common to have a global array, but each slot is thread local. In order to avoid false sharing, it’s best to have each element on its own cache line. The following code snippet assumes the cache line is 64 bytes.

Possibly, the placement of attribute is not so obvious, and here’s a break down of all (sensible) possible options.

  1. beginning of the typedef: apply to the new type
  2. before {: apply to the struct (preferred)
  3. after }: apply to the struct (same as 2)
  4. end of typedef: apply to the new type (same as 1)
  5. in the body of struct: apply to the struct

The last placement is a bit hacky, because it exploits the fact that struct needs to satisfy the strictest alignment of all its body member, but it places the attribute in its separate line, which makes it “outstanding”. LOL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include <stdint.h>
#include <stdio.h>

__attribute__ ((aligned (64)))
typedef struct {
void *p;
uint8_t x[57];
} begin_t;

typedef struct __attribute__ ((aligned (64))) {
void *p;
uint8_t x[57];
} before_t;

typedef struct {
void *p;
uint8_t x[57];
} __attribute__ ((aligned (64))) after_t;

typedef struct {
void *p;
uint8_t x[57];
} end_t __attribute__ ((aligned (64)));

typedef struct {
__attribute__ ((aligned (64)))
void *p;
uint8_t x[57];
} element_t;

element_t per_thread[64];

int main(int argc, char *argv[])
{
printf("%zu\n", sizeof(begin_t));
printf("%zu\n", sizeof(before_t));
printf("%zu\n", sizeof(after_t));
printf("%zu\n", sizeof(end_t));

printf("%zu\n", sizeof(element_t));
return 0;
}
1
2
3
4
5
72
128
128
72
128