Implementing a ping-pong benchmark in Go, C++, and Java.
The earlier 2022 ping-pong post compared Erlang, C++, and Java.
All programs below perform 20,000 ping-pong handshakes. Measurements are medians of five runs.
Go
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| package main
import ( "fmt" "sync" "time" )
func t1(loop int, ch chan int, wg *sync.WaitGroup) { defer wg.Done()
ch <- 1
for { x := <- ch if x == loop { break } ch <- (x+1) } }
func t2(loop int, ch chan int, wg *sync.WaitGroup) { defer wg.Done()
for { x := <- ch ch <- x if x == loop { break } }
}
func main() { var wg sync.WaitGroup
var ch = make(chan int)
var loop = 20_000
start := time.Now()
wg.Add(2) go t1(loop, ch, &wg) go t2(loop, ch, &wg) wg.Wait()
elapsed := time.Since(start)
fmt.Println("Time (ms): ", elapsed.Milliseconds()) }
|
Running on one CPU and on two logical CPUs from the same physical core:
1 2 3 4
| $ taskset -c 0 go run pingpong.go Time (ms): 12 $ taskset -c 0,6 go run pingpong.go Time (ms): 10
|
C++
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
| #include <iostream> #include <thread> #include <semaphore> #include <chrono>
#include <sys/resource.h>
using namespace std;
class MBox { binary_semaphore s{0}; public: void put(string msg) { s.release(); }
void get() { s.acquire(); } };
MBox ping_mbox; MBox pong_mbox;
constexpr int max_count = 20000;
void pong_runnable() { for (auto i = 0; i < max_count; ++i) { pong_mbox.get(); ping_mbox.put("Pong"); } }
int main() { thread pong_worker(pong_runnable);
auto start = chrono::steady_clock::now();
for (auto i = 0; i < max_count; ++i) { pong_mbox.put("Ping"); ping_mbox.get(); }
auto end = chrono::steady_clock::now(); pong_worker.join();
auto time_ms = chrono::duration_cast<chrono::microseconds>(end - start).count() / 1000.0;
cout << "Elapsed time: " << time_ms << " ms" << endl;
return 0; }
|
1 2 3 4 5
| $ clang++ -O -std=c++20 pingpong.cpp $ taskset -c 0 ./a.out Elapsed time: 79.802 ms $ taskset -c 0,6 ./a.out Elapsed time: 10.722 ms
|
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
| import java.util.concurrent.Exchanger; import java.util.concurrent.locks.*; import java.util.function.*;
class pingpong { final static boolean use_vthread = true; final static int max_count = 20000;
final static Exchanger<String> exchanger = new Exchanger<>();
static void send_msg(String msg) { }
static Thread launch_pong() { Runnable r = () -> { try { for (int i = 0; i < max_count; i++) { exchanger.exchange("Pong"); send_msg("Pong"); } } catch (InterruptedException e) { throw new RuntimeException(e); } };
Thread t;
if (use_vthread) { t = Thread.startVirtualThread(r); } else { t = new Thread(r); t.start(); }
return t; }
static void run_ping() { Runnable r = () -> { try { for (int i = 0; i < max_count; i++) { send_msg("Ping"); exchanger.exchange("Ping"); } } catch (InterruptedException e) { throw new RuntimeException(e); } };
if (use_vthread) { try { Thread.startVirtualThread(r).join(); } catch (InterruptedException e) { e.printStackTrace(); } } else { r.run(); } }
static void warmup() throws InterruptedException { for (var i = 0; i < 200; ++i) { var t = launch_pong(); run_ping(); t.join(); } }
public static void main(String[] args) throws InterruptedException { warmup();
var t = launch_pong();
long start = System.nanoTime(); run_ping(); t.join(); long end = System.nanoTime();
long timeElapsed = end - start; System.out.printf("Elapsed time: %.3f ms\n", timeElapsed / 1_000_000.0); } }
|
Using virtual threads:
1 2 3 4
| $ taskset -c 0 java pingpong.java Elapsed time: 7.615 ms $ taskset -c 0,6 java pingpong.java Elapsed time: 1.261 ms
|
Using OS threads (use_vthread = false):
1 2 3 4
| $ taskset -c 0 java pingpong.java Elapsed time: 62.011 ms $ taskset -c 0,6 java pingpong.java Elapsed time: 1.191 ms
|
Conclusion
Median of five runs:
| Lang/Runtime |
Elapsed time on one CPU |
Elapsed time on 2 logical CPUs |
| Go |
12 |
10 |
| C++ |
79.802 |
10.722 |
| Java |
62.011 |
1.191 |
| Java (vthread) |
7.615 |
1.261 |
- On one CPU, Java virtual threads are fastest; OS-thread C++ and Java are much slower.
- On two logical CPUs, both Java variants are fastest, while C++ closes the gap with Go.
ENV
1 2 3 4 5 6 7
| Go : 1.26 Clang : 19 Java : 26 Linux : 6.12 #CPU : 12 (6 cores) CPU : Intel(R) Core(TM) i7-9850H CPU @ 2.60GHz Turbo boost : off
|