Advanced_GPU_talk
Advanced_GPU_talk
Advanced_GPU_talk
You also want an ePaper? Increase the reach of your titles
YUMPU automatically turns print PDFs into web optimized ePapers that Google loves.
Overlap Example<br />
cudaStream_t streams[8];<br />
for (int i = 0; i < 8; i++) cudaStreamCreate(&stream[i]);<br />
!<br />
cudaMallocHost(host1); cudaMalloc(device1);<br />
!<br />
cudaMemcpyAsync(host1,device1,D2H,stream[0]);<br />
cudaMemcpyAsync(device2,host2,H2D,stream[1]);<br />
for (int i = 2; i < 8; i++) {<br />
H2D<br />
Copy<br />
D2H<br />
Copy<br />
kernel_do_sg(…);<br />
engine<br />
engine<br />
}<br />
kernel<br />
SM1<br />
SM2<br />
SM3<br />
kernel<br />
kernel<br />
Compute<br />
SM4<br />
kernel<br />
kernel<br />
engine<br />
SM5<br />
kernel<br />
SM6