-
Notifications
You must be signed in to change notification settings - Fork 1
/
Slides-part2_scalarization.tex
72 lines (53 loc) · 2.38 KB
/
Slides-part2_scalarization.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
\talksection{Scalarization Stage}
\begin{frame}[fragile]{Scalarization Overview}
\begin{itemize}
\item Eliminates vector operations from the source function
\item Vector types used likely to be narrower than the native SIMD width
\item To be combined with packetization
\begin{itemize}
\item Generate vector instructions with the native SIMD width
%\item Implicitely performs 'Structure-of-Arrays to Array-of-Structures' conversion
\end{itemize}
\item On its own, does not change the the behaviour of the code
%\item Alternative is instantiating vector instructions (and users) $N$ times
\end{itemize}
%\vspace{1.5ex}
\vfill
\hspace{1em}\includegraphics[scale=0.55]{images/stages-scalar.pdf}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{Scalarization Example}
\begin{itemize}
\item Example: Extract audio samples from left and right channels, scale by 2
\item Scalarizing $n$-element loads and stores introduces a stride of $n$
\begin{itemize}
\item Results in interleaved loads and stores after packetization
\end{itemize}
\end{itemize}
\begin{minipage}[t]{0.45\linewidth}
\vspace{0.1ex}
Before Scalarization (fragment):
\begin{codebox}[commandchars=\\\[\]]
int2 \uniform[*src], int \uniform[*left], int \uniform[*right];
int \varying[tid] = \varying[get_global_id(0)];
int2 \varying[sample] = \uniform[src]\idx[\varying[tid]];
\uniform[left]\idx[\varying[tid]] = (\varying[sample.x] << \uniform[1]);
\uniform[right]\idx[\varying[tid]] = (\varying[sample.y] << \uniform[1]);
\end{codebox}
\end{minipage}
\hspace{1em}
\begin{minipage}[t]{0.49\linewidth}
\vspace{0.1ex}
After Scalarization (reconstructed):
\begin{codebox}[commandchars=\\\[\]]
int2 \uniform[*src], int \uniform[*left], int \uniform[*right];
int \varying[tid] = \varying[get_global_id(0)];
int \uniform[*srcScalar] = ((int *)\uniform[src]);
int \varying[sampleLeft] = \uniform[srcScalar]\idx[(\varying[tid] * \uniform[2]) + \uniform[0]];
int \varying[sampleRight] = \uniform[srcScalar]\idx[(\varying[tid] * \uniform[2]) + \uniform[1]];
\uniform[left]\idx[\varying[tid]] = (\varying[sampleLeft] << \uniform[1]);
\uniform[right]\idx[\varying[tid]] = (\varying[sampleRight] << \uniform[1]);
\end{codebox}
%int \varying[sampleLeft] = ((int *)&\uniform[src]\idx[\varying[tid]])\idx[\uniform[0]];
\end{minipage}
\end{frame}