Came across this commit while working with OpenJDK. The whole stuff was beyond me at first sight, so here’s a simplified version to capture the idiom.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <iostream>

using namespace std;

class BaseClosure {
public:
virtual void f() = 0;
};

class AClosure : public BaseClosure {
public:
virtual void f() {
puts("B");
}
};

class BClosure : public AClosure {
};

// CL is the concrete type, set by the caller of `iterate`.
// U is the root type in the type hierarchy; `BaseClosure` in this example.
// T is between [CL, U]; the first type with a `void f()` method, searching from `CL` to `U`.
class O {
#if 0
template<typename T, typename U, typename CL>
void
iterate(void (T::*)(), void (U::*)(), CL* t) {
t->f();
}
#else
template<typename T, typename U, typename CL>
typename enable_if<is_same<T, U>::value, void>::type
iterate(void (T::*)(), void (U::*)(), CL* t) {
t->f();
}
template<typename T, typename U, typename CL>
typename enable_if<!is_same<T, U>::value, void>::type
iterate(void (T::*)(), void (U::*)(), CL* t) {
t->T::f();
}
#endif
public:
template<typename CL>
void iterate(CL* t) {
iterate(&CL::f, &BaseClosure::f, t);
}
};

void myfunc(BClosure* cl) {
O o;
o.iterate(cl);
}

Using the #if switch, we can control the devirtulation.

1
2
$ clang++ -O -S test.cc -o slow.s # with #if 1
$ clang++ -O -S test.cc -o fast.s # with #if 0

Comparing slow.s with fast.s, we can see the key difference lies in the body of myfunc. In the fast case, puts is inlined, hence a direct call, in contrast to the indirect call in the slow case.

1
2
3
# slow.s
movq (%rdi), %rax
jmpq *(%rax) # TAILCALL
1
2
3
# fast.s
movl $.L.str, %edi
jmp puts # TAILCALL