-
Notifications
You must be signed in to change notification settings - Fork 59
/
cpp-bench-register.cpp
173 lines (139 loc) · 7.85 KB
/
cpp-bench-register.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*
* cpp-bench-register.cpp
*
* The registration for the benchmarks written in C++.
*
* This is in a separate file to so that the methods calls don't inlined and removed - in the C++
* methods, you should return a value that depends on the interesting work you do in your benchmark
* method which will prevent the function calculation itself from being optimized away.
*/
#include "benchmark.hpp"
#include "cpp-benches.hpp"
#define DIV_REG_X(f) \
f( 32_64, " 32b / 64b") \
f( 64_64, " 64b / 64b") \
f(128_64, "128b / 64b") \
#define DIV_REG(suffix, text) \
maker.template make<div_lat_inline ##suffix> ("div"#suffix"-lat", "Dependent " text " inline divisions", 1); \
/*maker.template make<div_lat_noinline ##suffix >("div"#suffix"-lat-ni", "Dependent " text " noinline divisions", 1); */\
maker.template make<div_tput_inline ##suffix >("div"#suffix"-tput", "Independent " text " inline divisions", 1); \
/*maker.template make<div_tput_noinline##suffix> ("div"#suffix"-tput-ni","Independent " text " noinline divisions", 1); */\
template <bench2_f F, typename M>
void make_strided_stores(M& maker, size_t data_size) {
std::string prefix = std::to_string(data_size * 8);
mem_args samelocargs{(char *)aligned_ptr(64, 1024), 0, 0};
mem_args crossingargs{(char *)aligned_ptr(64, 1024) + 63, 0, 0};
// aligned same-location stores
maker.setLoopCount(10000).
template make<F>(prefix + "-sameloc-stores",
string_format("%2zu bit stores to same location ", data_size * 8),
1,
[=]{ return new mem_args(samelocargs); });
// cache line split same-location stores (except byte stores, which never cross)
if (data_size != 1) {
maker.setLoopCount(10000).
template make<F>(prefix + "-sameloc-split-stores",
string_format("%2zu bit cl split stores to same location ", data_size * 8),
1,
[=]{ return new mem_args(crossingargs); });
}
for (size_t stride : {1, 2, 4, 8, 16, 32, 64, 128}) {
for (size_t kib : {4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048}) {
size_t region_bytes = kib * 1024;
size_t access_per_region = region_bytes / stride;
assert(is_pow2(kib));
auto stride_str = std::to_string(stride);
std::string region = std::to_string(kib) + " KiB";
mem_args args{(char *)aligned_ptr(64, (kib + 1) * 1024), stride, region_bytes - 1};
maker. setLoopCount(10 * access_per_region).
template make<F>(prefix + "-strided-" + stride_str + "-stores-" + std::to_string(kib) + "-kib",
string_format("%2zu bit stores, stride %3zu, size %4zu KiB", data_size * 8, stride, kib),
1,
[=]{ return new mem_args(args); });
}
}
}
template <typename TIMER>
void register_cpp(GroupList& list) {
std::shared_ptr<BenchmarkGroup> cpp_group = std::make_shared<BenchmarkGroup>("cpp", "Tests written in C++");
list.push_back(cpp_group);
{
auto maker = DeltaMaker<TIMER>(cpp_group.get()).useLoopDelta();
DIV_REG_X(DIV_REG)
maker.template make<gettimeofday_bench>("gettimeofday", "gettimeofday() libc call", 1);
maker.template make<crc8_bench>("crc8", "crc8 loop", 4096);
maker.setLoopCount(1000).template make<sum_halves_bench>("sum-halves", "Sum 16-bit halves of array elems", 2048);
maker.setLoopCount(1000).template make<mul_by_bench>("mul-4", "Four multiplications", 4096);
maker.setLoopCount(1000).template make<mul_chain_bench>("mul-chain", "Chained multiplications", 4096);
maker.setLoopCount(1000).template make<mul_chain4_bench>("mul-chain4", "Chained multiplications, 4 chains", 4096);
maker.setLoopCount(1000).template make<add_indirect >("add-indirect", "Indirect adds from memory", 2048);
maker.setLoopCount(1000).template make<add_indirect_shift>("add-indirect-shift", "Indirect adds from memory, tricky", 2048);
}
{
// linked list tests
auto maker = DeltaMaker<TIMER>(cpp_group.get(), 1000);
size_t list_ops = LIST_COUNT;
maker.template make<linkedlist_sentinel>("linkedlist-sentinel", "Linked-list w/ sentinel", list_ops);
maker.template make<linkedlist_counter> ("linkedlist-counter", "Linked-list w/ count", list_ops);
}
{
std::shared_ptr<BenchmarkGroup> group = std::make_shared<BenchmarkGroup>("memory/cpp/store", "Strided stores");
list.push_back(group);
auto maker = DeltaMaker<TIMER>(group.get());
make_strided_stores<strided_stores_1byte>(maker, 1);
make_strided_stores<strided_stores_4byte>(maker, 4);
make_strided_stores<strided_stores_8byte>(maker, 8);
}
{
std::shared_ptr<BenchmarkGroup> group = std::make_shared<BenchmarkGroup>("memory/cpp/store-volatile", "Simple stores");
list.push_back(group);
auto maker = DeltaMaker<TIMER>(group.get());
#define MAKE_GAP_BENCH(gap, gaptype, bits, bitstr) \
maker.template make<GAP_FN(gap, gaptype, bits)> \
(#bits "gap" #gap, bitstr " stores with " #gap " " #gaptype " gap", 1);
VS_GAP_GAP_X(MAKE_GAP_BENCH)
}
{
static_assert(sizeof(void *) == sizeof(size_t), "tunneling of size_t through void * isn't going to work");
std::shared_ptr<BenchmarkGroup> group = std::make_shared<BenchmarkGroup>("memory/cpp/store-alignment", "Misaligned stores");
list.push_back(group);
auto maker = DeltaMaker<TIMER>(group.get());
for (size_t offset = 0; offset <= 64; offset++) {
auto ostr = std::to_string(offset);
maker.template make<misaligned_stores_sameloc>("sameloc-offset-" + ostr,
"Same location stores with offset " + ostr, 1, constant((void *)offset));
}
for (size_t offset = 0; offset <= 64; offset++) {
auto ostr = std::to_string(offset);
maker.template make<misaligned_stores_rolling>("rolling-offset-" + ostr,
"1B stide overlapping stores offset " + ostr, 1, constant((void *)offset));
}
for (size_t offset = 0; offset <= 64; offset++) {
auto ostr = std::to_string(offset);
maker.template make<misaligned_stores_twoloc>("twoloc-offset-" + ostr,
"two missaligned stores offset " + ostr, 1, constant((void *)offset));
}
}
{
std::shared_ptr<BenchmarkGroup> group = std::make_shared<BenchmarkGroup>("studies/memory/store-volatile", "64-bit store study");
list.push_back(group);
auto maker = DeltaMaker<TIMER>(group.get(), 100000);
maker.template make<volatile_stores_study>("64b", "64-bit store study", 1);
#define MAKE_ARB_BENCH(type, name, ...) maker.template make<arb_offset_##type##_##name> \
("arb_" #type "_" #name, #type " stores with gaps " #name, 1);
ARB_OFFSET_X(MAKE_ARB_BENCH)
}
{
std::shared_ptr<BenchmarkGroup> group = std::make_shared<BenchmarkGroup>("transcendental", "Timing math.h functions");
list.push_back(group);
auto maker = DeltaMaker<TIMER>(group.get(), 100000);
#define MAKE_TRAN_BENCH(name) \
maker.template make<transcendental_##name>(#name, #name "(double x) throughput", 1);
TRANSCENDENTAL_X(MAKE_TRAN_BENCH);
#define MAKE_TRAN_BENCH_LAT(name) \
maker.template make<transcendental_lat_##name>(#name "_latency", #name "(double x) latency", 1);
TRANSCENDENTAL_X(MAKE_TRAN_BENCH_LAT);
}
}
#define REG_DEFAULT(CLOCK) template void register_cpp<CLOCK>(GroupList& list);
ALL_TIMERS_X(REG_DEFAULT)