with CUDA Fortran
with CUDA Fortran
with CUDA Fortran
Create successful ePaper yourself
Turn your PDF publications into a flip-book with our unique Google optimized e-Paper software.
Matrix Transpose - Shared Memory<br />
attributes(global) subroutine transposeCoalesced(odata, idata)<br />
real, intent(out) :: odata(ny,nx)<br />
real, intent(in) :: idata(nx,ny)<br />
real, shared :: tile(TILE_DIM, TILE_DIM)<br />
integer :: x, y<br />
x = (blockIdx%x-1)*blockDim%x + threadIdx%x<br />
y = (blockIdx%y-1)*blockDim%y + threadIdx%y<br />
tile(threadIdx%x, threadIdx%y) = idata(x,y)<br />
call syncthreads()<br />
x = (blockIdx%y-1)*blockDim%y + threadIdx%x<br />
y = (blockIdx%x-1)*blockDim%x + threadIdx%y<br />
odata(x,y) = tile(threadIdx%y, threadIdx%x)<br />
end subroutine transposeCoalesced<br />
idata<br />
tile<br />
odata