forked from jmrosinski/GPTL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getoverhead.c
277 lines (254 loc) · 9.04 KB
/
getoverhead.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#include <stdio.h>
#include <string.h>
#include "private.h"
static int gptlstart_sim (char *, int);
static Timer *getentry_instr_sim (const Hashentry *,void *, unsigned int *, const int);
static void misc_sim (Nofalse *, Timer ***, int);
static bool initialized = true;
static bool disabled = false;
/*
** All routines in this file are non-public
*/
/*
** GPTLget_overhead: return current status info about a timer. If certain stats are not enabled,
** they should just have zeros in them. If PAPI is not enabled, input counter info is ignored.
**
** Input args:
** fp: File descriptor to write to
** ptr2wtimefunc: Underlying timing routine
** getentry: From gptl.c, finds the entry in the hash table
** genhashidx: From gptl.c, generates the hash index
** get_thread_num:From gptl.c, gets the thread number
** hashtable: hashtable for thread 0
** tablesize: size of hashtable
** dousepapi: whether or not PAPI is enabled
**
** Output args:
** self_ohd: Estimate of GPTL-induced overhead in the timer itself (included in "Wallclock")
** parent_ohd: Estimate of GPTL-induced overhead for the timer which appears in its parents
*/
int GPTLget_overhead (FILE *fp,
double (*ptr2wtimefunc)(void),
Timer *getentry (const Hashentry *, const char *, unsigned int),
unsigned int genhashidx (const char *),
int get_thread_num (void),
Nofalse *stackidx,
Timer ***callstack,
const Hashentry *hashtable,
const int tablesize,
bool dousepapi,
int imperfect_nest,
double *self_ohd,
double *parent_ohd)
{
double t1, t2; /* Initial, final timer values */
double ftn_ohd; /* Fortran-callable layer */
double get_thread_num_ohd; /* Getting my thread index */
double genhashidx_ohd; /* Generating hash index */
double getentry_ohd; /* Finding entry in hash table */
double utr_ohd; /* Underlying timing routine */
double papi_ohd; /* Reading PAPI counters */
double total_ohd; /* Sum of overheads */
double getentry_instr_ohd; /* Finding entry in hash tabe for auto-instrumented calls */
double misc_ohd; /* misc. calcs within start/stop */
int i, n;
int ret;
int mythread; /* which thread are we */
unsigned int hashidx; /* Hash index */
int randomvar; /* placeholder for taking the address of a variable */
Timer *entry; /* placeholder for return from "getentry()" */
static const char *thisfunc = "GPTLget_overhead";
/*
** Gather timings by running kernels 1000 times each.
** First: Fortran wrapper overhead
*/
t1 = (*ptr2wtimefunc)();
#pragma unroll(10)
for (i = 0; i < 1000; ++i) {
ret = gptlstart_sim ("timername", strlen ("timername"));
}
t2 = (*ptr2wtimefunc)();
ftn_ohd = 0.001 * (t2 - t1);
/* get_thread_num() overhead */
t1 = (*ptr2wtimefunc)();
#pragma unroll(10)
for (i = 0; i < 1000; ++i) {
mythread = get_thread_num ();
}
t2 = (*ptr2wtimefunc)();
get_thread_num_ohd = 0.001 * (t2 - t1);
/* genhashidx overhead */
t1 = (*ptr2wtimefunc)();
#pragma unroll(10)
for (i = 0; i < 1000; ++i) {
hashidx = genhashidx ("timername");
}
t2 = (*ptr2wtimefunc)();
genhashidx_ohd = 0.001 * (t2 - t1);
/*
** getentry overhead
** Find the first hashtable entry with a valid name. Start at 1 because 0 is not a valid hash
*/
for (n = 1; n < tablesize; ++n) {
if (hashtable[n].nument > 0 && strlen (hashtable[n].entries[0]->name) > 0) {
hashidx = genhashidx (hashtable[n].entries[0]->name);
t1 = (*ptr2wtimefunc)();
for (i = 0; i < 1000; ++i)
entry = getentry (hashtable, hashtable[n].entries[0]->name, hashidx);
t2 = (*ptr2wtimefunc)();
fprintf (fp, "%s: using hash entry %d=%s for getentry estimate\n",
thisfunc, n, hashtable[n].entries[0]->name);
break;
}
}
if (n == tablesize) {
fprintf (fp, "%s: hash table empty: Using alternate means to find getentry time\n", thisfunc);
t1 = (*ptr2wtimefunc)();
for (i = 0; i < 1000; ++i)
entry = getentry (hashtable, "timername", hashidx);
t2 = (*ptr2wtimefunc)();
}
getentry_ohd = 0.001 * (t2 - t1);
/* utr overhead */
t1 = (*ptr2wtimefunc)();
#pragma unroll(10)
for (i = 0; i < 1000; ++i) {
t2 = (*ptr2wtimefunc)();
}
utr_ohd = 0.001 * (t2 - t1);
/* PAPI overhead */
#ifdef HAVE_PAPI
if (dousepapi) {
t1 = (*ptr2wtimefunc)();
read_counters1000 ();
t2 = (*ptr2wtimefunc)();
} else {
t1 = 0.;
t2 = 0.;
}
papi_ohd = 0.001 * (t2 - t1);
#else
papi_ohd = 0.;
#endif
/* getentry_instr overhead */
t1 = (*ptr2wtimefunc)();
#pragma unroll(10)
for (i = 0; i < 1000; ++i) {
entry = getentry_instr_sim (hashtable, &randomvar, &hashidx, tablesize);
}
t2 = (*ptr2wtimefunc)();
getentry_instr_ohd = 0.001 * (t2 - t1);
/* misc start/stop overhead */
if (imperfect_nest) {
fprintf (fp, "Imperfect nesting detected: setting misc_ohd=0\n");
misc_ohd = 0.;
} else {
t1 = (*ptr2wtimefunc)();
#pragma unroll(10)
for (i = 0; i < 1000; ++i) {
misc_sim (stackidx, callstack, 0);
}
t2 = (*ptr2wtimefunc)();
misc_ohd = 0.001 * (t2 - t1);
}
total_ohd = ftn_ohd + get_thread_num_ohd + genhashidx_ohd + getentry_ohd +
utr_ohd + misc_ohd + papi_ohd;
fprintf (fp, "Total overhead of 1 GPTL start or GPTLstop call=%g seconds\n", total_ohd);
fprintf (fp, "Components are as follows:\n");
fprintf (fp, "Fortran layer: %7.1e = %5.1f%% of total\n",
ftn_ohd, ftn_ohd / total_ohd * 100.);
fprintf (fp, "Get thread number: %7.1e = %5.1f%% of total\n",
get_thread_num_ohd, get_thread_num_ohd / total_ohd * 100.);
fprintf (fp, "Generate hash index: %7.1e = %5.1f%% of total\n",
genhashidx_ohd, genhashidx_ohd / total_ohd * 100.);
fprintf (fp, "Find hashtable entry: %7.1e = %5.1f%% of total\n",
getentry_ohd, getentry_ohd / total_ohd * 100.);
fprintf (fp, "Underlying timing routine: %7.1e = %5.1f%% of total\n",
utr_ohd, utr_ohd / total_ohd * 100.);
fprintf (fp, "Misc start/stop functions: %7.1e = %5.1f%% of total\n",
misc_ohd, misc_ohd / total_ohd * 100.);
#ifdef HAVE_PAPI
if (dousepapi) {
fprintf (fp, "Read PAPI counters: %7.1e = %5.1f%% of total\n",
papi_ohd, papi_ohd / total_ohd * 100.);
}
#endif
fprintf (fp, "\n");
fprintf (fp, "NOTE: If GPTL is called from C not Fortran, the 'Fortran layer' overhead is zero\n");
fprintf (fp, "NOTE: For calls to GPTLstart_handle()/GPTLstop_handle(), the 'Generate hash index' overhead is zero\n");
fprintf (fp, "NOTE: For auto-instrumented calls, the cost of generating the hash index plus finding\n"
" the hashtable entry is %7.1e not the %7.1e portion taken by GPTLstart\n",
getentry_instr_ohd, genhashidx_ohd + getentry_ohd);
fprintf (fp, "NOTE: Each hash collision roughly doubles the 'Find hashtable entry' cost of that timer\n");
*self_ohd = ftn_ohd + utr_ohd; /* In GPTLstop() ftn wrapper is called before utr */
*parent_ohd = ftn_ohd + utr_ohd + misc_ohd +
2.*(get_thread_num_ohd + genhashidx_ohd + getentry_ohd + papi_ohd);
return 0;
}
/*
** GPTLstart_sim: Simulate the cost of Fortran wrapper layer "gptlstart()"
**
** Input args:
** name: timer name
** nc1: number of characters in "name"
*/
static int gptlstart_sim (char *name, int nc1)
{
char cname[MAX_CHARS+1];
int numchars;
numchars = MIN (nc1, MAX_CHARS);
strncpy (cname, name, numchars);
cname[numchars] = '\0';
return 0;
}
/*
** getentry_instr_sim: Simulate the cost of getentry_instr(), which is invoked only when
** auto-instrumentation is enabled on non-AIX platforms
**
** Input args:
** hashtable: hashtable for thread 0
** self: address of function
** indx: hashtable index
** tablesize: size of hashtable
*/
static Timer *getentry_instr_sim (const Hashentry *hashtable,
void *self,
unsigned int *indx,
const int tablesize)
{
Timer *ptr = 0;
*indx = (((unsigned long) self) >> 4) % tablesize;
if (hashtable[*indx].nument > 0 && hashtable[*indx].entries[0]->address == self) {
ptr = hashtable[*indx].entries[0];
}
return ptr;
}
/*
** misc_sim: Simulate the cost of miscellaneous computations in start/stop
**
** Input args:
** stackidx: stack index
** callstack: call stack
** t: thread index
*/
static void misc_sim (Nofalse *stackidx, Timer ***callstack, int t)
{
int bidx;
Timer *bptr;
static Timer *ptr = 0;
static const char *thisfunc = "misc_sim";
if (disabled)
printf ("GPTL: %s: should never print disabled\n", thisfunc);
if (! initialized)
printf ("GPTL: %s: should never print ! initialized\n", thisfunc);
bidx = stackidx[t].val;
bptr = callstack[t][bidx];
if (ptr == bptr)
printf ("GPTL: %s: should never print ptr=bptr\n", thisfunc);
--stackidx[t].val;
if (stackidx[t].val < -2)
printf ("GPTL: %s: should never print stackidxt < -2\n", thisfunc);
if (++stackidx[t].val > MAX_STACK-1)
printf ("GPTL: %s: should never print stackidxt > MAX_STACK-1\n", thisfunc);
return;
}