2013-09-30 30 views
1

我对着加载阵列的问题,从全球到共享内存喂高效和正确的方法,从全局负载晕阵列共享内存

这是问题所在: 我有一个大阵列(256, 64)在我的全局内存中,我想加载到共享内存的大小[16] [16] 在我的计算中,我将需要相邻值(晕)

我发现我的自我在一个非常分歧的代码,因此非常慢,最后它不起作用。这里是我的方法 我会感谢你的建议

real, shared :: s_data(-1:16,-1:16) 

d_j = (blockIdx%x-1) * blockDim%x + threadIdx%x-1 
d_l = (blockIdx%y-1) * blockDim%y + threadIdx%y-1 

tIdx = threadIdx%x -1 
tIdy = threadIdx%y -1 

    bdimx = 256/(blockDim%x) !16 
    bdimy = 64/(blockDim%y) !8 


d_l1=d_l+1 
if(d_l1==d_lmax) d_l1=0 

d_l0 = d_l -1 
if(d_l==0) d_l0=d_lmax-1 
call syncthreads() 

!load the main part 
s_data(tIdx,tIdy) = g_data(d_j,d_l) 


!Filling halos 
if(tIdx ==0)then 
     f(bx == 0) then 
     s_data(tIdx-1,tIdy) =0 
    else 
     s_data(tIdx-1,tIdy) = g_data(d_j-1,d_l) 
    end if 
end if 

!Fill (16,tIdy) 
if(tIdx == blockDim%x-1)then 
    if(bx == bdmx-1) then 
     s_data(tIdx+1,tIdy) = 0 
    else 
     s_data(tIdx+1,tIdy) = g_data(d_j+1,d_l) 
    end if 
end if 

!Fill (-1,tIdy) 
if(tIdy == 0)then    
    s_data(tIdx,tIdy+1)=g_data(d_j,d_l1) 
end if 

!Fill (N,tIdy) 
if(tIdy == blockDim%y -1)then 
    s_data(tIdx,tIdy-1) = g_data(d_j,d_l0) 
end if 

!Fill (-1,-1) and (-1, N) 
if(tIdx==0)then 
    if(bx == 0)then 
     if(tIdy == 0) then 
      s_data(tIdx-1,tIdy-1) =0 
     end if 
     if(tIdy == blockDim%y-1) then 
      s_data(tIdx-1,tIdy+1) = 0 
     end if 

    else 
     if(tIdy == 0) then 
      s_data(tIdx-1,tIdy-1) =g_data(d_j-1,d_l0) 
     end if 
     if(tIdy == blockDim%y) then 
      s_data(tIdx-1,tIdy+1) = g_data(d_j-1,d_l1) 
     end if 
    end if 
end if 

!Fill (N, -1) & (N,N) 
if(tIdx==blockDim%x-1)then 
     if(bx == bdimx-1)then 
      if(tIdy == 0) then 
      s_data(tIdx+1,tIdy-1) = 0 
      end if 
      if(tIdy == blockDim%y) then 
      s_data(tIdx+1,tIdy+1) = 0 
      end if 
     else 
      if(tIdy == 0) then 
       s_data(tIdx+1,tIdy-1) =g_data(d_j+1,d_l0) 
      end if 
      if(dIdy == blockDim%y) then 
       s_data(tIdx+1,tIdy+1) = g_data(d_j+1,d_l1) 
      end if 
end if 

!做一些运算与S_DATA

回答