According to IEEE floating point definition, double uses the following encoding (assuming 64 bits):

1 sign bit + 11 exponent bits + 52 mantissa

Some interesting special values are:

1
2
3
4
* INF: 0 {1}*11 {0}*52 // positive sign, set exp and unset mantissa
* -INF: 1 {1}*11 {0}*52 // negative sign, set exp and unset mantissa
* Quiet NaN: _ {1}*11 1 {_}*51 // any sign, set exp and one bit after exp, and the rest bits are free
* Singal NaN: _ {1}*11 0 [^{0}*51] // any sign, set exp, and the bit after expo is unset, and the rest bits contains at least one bit set

Because there are 51 free bits in NaN case, we can embed additional info inside it, hence the name NaN boxing.

The code is more or less converted from the C++ code snippet at https://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html, with the different treatment on handling nan. On my box (ubuntu 14.04 using clang 3.8), sqrt(-1) produces -nan, and it’s considered as a double.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <stdio.h>
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <assert.h>
#include <stdlib.h>

// #include <limits.h>
// #include <string.h>
// #include "./tik.h"

typedef union {
// sign bits: 1
// exponent bits: 11
// quiet/signal bits: 1
// significand bits: 51
double as_double;
uint64_t bits;
int32_t as_int32;
} value_t;

static const uint64_t max_double = 0xFFF8ULL << 48;
static const uint64_t ptr_mask = 0xFFF9ULL << 48;
static const uint64_t int_mask = 0xFFFAULL << 48;

bool is_neg_zero(double x)
{
return (int)x == 0 && (*(uint64_t*)&x >> 63) == 1;
}

value_t from_double(double x)
{
value_t v;
if (x == (int32_t)x && !is_neg_zero(x)) {
v.bits = (unsigned long) (int32_t)x | int_mask;
return v;
}
v.as_double = x;
return v;
}

value_t from_ptr(void *p)
{
uintptr_t p_i = (uintptr_t) p;
assert((p_i & ptr_mask) == 0);
return (value_t) { .bits = p_i | ptr_mask };
}

bool is_double(value_t v)
{
return v.bits <= max_double;
}

bool is_ptr(value_t v)
{
return (v.bits & ptr_mask) == ptr_mask;
}

bool is_int32(value_t v)
{
return (v.bits & int_mask) == int_mask;
}

void test(value_t v)
{
if (is_int32(v)) {
printf("v is int: %d\n", v.as_int32);
return;
}
if (is_double(v)) {
printf("v is double: %lf\n", v.as_double);
return;
}
if (is_ptr(v)) {
printf("v is ptr: %p\n", (void*) (v.bits & ~ptr_mask));
return;
}
puts("IMPOSSIBLE");
assert(0);
}

int main()
{
value_t v;
v = from_double(0.3);
test(v);
v = from_double(3);
test(v);
v = from_ptr(malloc(1));
test(v);
v = from_double(1.0/0);
test(v);
v = from_double(sqrt(-1));
test(v);
return 0;
}
1
2
3
4
5
v is double: 0.300000
v is int: 3
v is ptr: 0x1d77010
v is double: inf
v is double: -nan

§Reference

https://www.doc.ic.ac.uk/~eedwards/compsys/float/nan.html