start_time=systime(2)
while i lt num_rows do begin
; ggi = GG[*,i*num_cols:(i+1)*num_cols-1]
gpuSubArr,GG_gpu,[0L,-1L],$
[i*num_cols,(i+1)*num_cols-1],$
ggi_gpu,[0L,-1L],[0L,-1L]
; KK = transpose(G2)##(intarr(num_cols)+1)
gpuMatrix_Multiply,onesnc_gpu,$
G2_gpu,KK_gpu,/btranspose
; KK = KK + transpose(intarr(m)+1)##$
; GG2[i*num_cols:(i+1)*num_cols-1]
gpuSubArr,GG2_gpu,[i*num_cols,(i+1)*num_cols-1],$
gg2i_gpu,[0L,-1L]
gpuMatrix_Multiply,gg2i_gpu,onesm_gpu,res_gpu,$
/btranspose
gpuAdd,KK_gpu,res_gpu,KK_gpu
; KK = KK - 2*G##transpose(ggi)
gpuMatrix_Multiply,ggi_gpu,G_gpu,res_gpu,$
/atranspose
gpuAdd,1.0,KK_gpu,-2.0,res_gpu,0.0,KK_gpu
; KK = exp(-gma*KK)
gpuExp,1.0,-gma,KK_gpu,0.0,0.0,KK_gpu
; image[*,i,*] = alpha[*,0:npc-1]##KK
gpuMatrix_Multiply,KK_gpu,alpha_gpu,res1_gpu
gpuGetArr,res1_gpu,res
image[*,i,*]=res
i++
endwhile
print,'elapsed time: ',systime(2)-start_time
Here is the time taken to project a 1000x1000x6 image on the GPU
elapsed time: 12.093000
and on the CPU
elapsed time: 197.39100
for a speedup of 16. Not bad.

0 comments:
Post a Comment