2010-07-19 186 views
1

我试图添加两个4800x9600矩阵,但我遇上了困难......CUDA基本矩阵加 - 大型矩阵

这是一个简单的C = A + B操作...

这里内核:

__global__ void matAdd_kernel(float* result,float* A,float* B,int size) 
{ 
     int x=blockIdx.x*blockDim.x+threadIdx.x; 
     int y=blockIdx.y*blockDim.y+threadIdx.y; 
     int idx=x*y+x; 

     if(idx<size) 
     { 
       result[idx]=A[idx]+B[idx]; 
     } 
} 

这里是代码:

void matAdd(Matrix C,Matrix A,Matrix B) 
{ 
     int N=A.w*A.h; 
     dim3 dimBlock=dim3(22,22); 
     int yBlocks=A.w/dimBlock.y+((A.w%dimBlock.y)==0?0:1); //yBlocks is 219 for dimBlock(22,22) and 9600x4800 
     int xBlocks=A.h/dimBlock.x+((A.h%dimBlock.x)==0?0:1); //xBlocks is 437 for dimBlock(22,22) and 9600x4800 
     dim3 dimGrid=dim3(xBlocks,yBlocks); 
     matAdd_kernel<<<dimGrid,dimBlock>>>(C.data,A.data,B.data,N); 
     cutilCheckMsg("kernel launch failure"); 
} 

我使用的是矩阵如下:

typedef struct{ 
     int w; 
     int h; 
     float* data; 
}Matrix; 

这里是当我打印了前10行和底部10行输出:

top-10 rows A: 
-0.023930 0.047744 -0.074694 0.053555 -0.032298 0.038762 -0.068890 0.088894 -0.044989 0.005679 -0.054846 0.064743 
-0.070026 0.059445 -0.078712 0.001957 -0.050910 0.067603 -0.089646 0.076562 -0.039840 0.052980 -0.074809 0.037390 
-0.042785 0.087303 -0.005369 0.017769 -0.075572 0.075981 -0.064457 0.067737 -0.045192 0.046887 -0.030999 0.006888 
-0.040708 0.020566 -0.089926 0.082820 -0.010478 0.021086 -0.086581 0.095966 -0.054339 0.068906 -0.060855 0.087460 
-0.059717 0.038708 -0.026613 0.053984 -0.088490 0.066764 -0.005617 0.091969 -0.018239 0.097972 -0.073692 0.010064 
-0.052374 0.048555 -0.037706 0.043377 -0.071556 0.075888 -0.002523 0.037950 -0.065693 0.078094 -0.011694 0.039196 
-0.092799 0.011099 -0.056766 0.091866 -0.059577 0.029236 -0.063502 0.091717 -0.030844 0.079273 -0.087244 0.048310 
-0.089582 0.004614 -0.002560 0.058306 -0.006922 0.097391 -0.099892 0.039699 -0.036129 0.038520 -0.084387 0.012408 
-0.054143 0.048351 -0.006309 0.002902 -0.073858 0.012903 -0.089030 0.041077 -0.034445 0.030259 -0.071056 0.002762 
-0.048605 0.047165 -0.082960 0.096326 -0.066084 0.029297 -0.070599 0.034394 -0.044475 0.075287 -0.063274 0.023137 
-0.033560 0.030527 -0.019907 0.078961 -0.052821 0.088959 -0.0.061808 -0.020862 0.058320 -0.028586 0.079149 
-0.087878 0.034127 -0.040097 0.092205 -0.033817 0.099641 -0.002590 0.012473 -0.050764 0.093213 -0.065811 0.075233 

bottom-10 rows A: 
0.006791 -0.015538 0.072625 -0.049761 0.029860 -0.093147 0.023192 -0.031281 0.030545 -0.068470 0.020244 
0.014509 -0.057081 0.049269 -0.047556 0.022443 -0.092672 0.065184 -0.030968 0.097352 -0.052493 0.062981 
0.004188 -0.028991 0.063084 -0.082578 0.005537 -0.030271 0.038801 -0.043018 0.066686 -0.004677 0.054946 
0.013995 -0.011381 0.075888 -0.069206 0.012784 -0.009126 0.068735 -0.066544 0.070738 -0.055201 0.097867 
0.083719 -0.007838 0.018854 -0.098974 0.023769 -0.044483 0.028541 -0.032198 0.047691 -0.005788 0.039455 
0.066290 -0.033136 0.097825 -0.051469 0.012732 -0.038881 0.076786 -0.069891 0.084848 -0.050189 0.017055 
0.077407 -0.088394 0.006851 -0.047383 0.081140 -0.094065 0.002880 -0.072353 0.095627 -0.096577 0.025683 
0.023140 -0.008283 0.096901 -0.011595 0.031076 -0.079637 0.050198 -0.014112 0.027430 -0.012270 0.054234 
0.011981 -0.053835 0.076015 -0.062570 0.082806 -0.040616 0.030618 -0.003141 0.031599 -0.093869 0.048415 
0.065879 -0.060177 0.085832 -0.000699 0.038540 -0.014198 0.018127 -0.013525 0.094031 -0.072898 0.083781 
0.056596 -0.090405 0.092818 -0.013577 0.078385 -0.061543 0.053441 -0.092938 0.032074 -0.017903 0.051810 

top-10 rows B: 
-0.023930 0.047744 -0.074694 0.053555 -0.032298 0.038762 -0.068890 0.088894 -0.044989 0.005679 -0.054846 0.064743 
-0.070026 0.059445 -0.078712 0.001957 -0.050910 0.067603 -0.089646 0.076562 -0.039840 0.052980 -0.074809 0.037390 
-0.042785 0.087303 -0.005369 0.017769 -0.075572 0.075981 -0.064457 0.067737 -0.045192 0.046887 -0.030999 0.006888 
-0.040708 0.020566 -0.089926 0.082820 -0.010478 0.021086 -0.086581 0.095966 -0.054339 0.068906 -0.060855 0.087460 
-0.059717 0.038708 -0.026613 0.053984 -0.088490 0.066764 -0.005617 0.091969 -0.018239 0.097972 -0.073692 0.010064 
-0.052374 0.048555 -0.037706 0.043377 -0.071556 0.075888 -0.002523 0.037950 -0.065693 0.078094 -0.011694 0.039196 
-0.092799 0.011099 -0.056766 0.091866 -0.059577 0.029236 -0.063502 0.091717 -0.030844 0.079273 -0.087244 0.048310 
-0.089582 0.004614 -0.002560 0.058306 -0.006922 0.097391 -0.099892 0.039699 -0.036129 0.038520 -0.084387 0.012408 
-0.054143 0.048351 -0.006309 0.002902 -0.073858 0.012903 -0.089030 0.041077 -0.034445 0.030259 -0.071056 0.002762 
-0.048605 0.047165 -0.082960 0.096326 -0.066084 0.029297 -0.070599 0.034394 -0.044475 0.075287 -0.063274 0.023137 
-0.033560 0.030527 -0.019907 0.078961 -0.052821 0.088959 -0.0.061808 -0.020862 0.058320 -0.028586 0.079149 
-0.087878 0.034127 -0.040097 0.092205 -0.033817 0.099641 -0.002590 0.012473 -0.050764 0.093213 -0.065811 0.075233 

bottom-10 rows B: 
0.006791 -0.015538 0.072625 -0.049761 0.029860 -0.093147 0.023192 -0.031281 0.030545 -0.068470 0.020244 
0.014509 -0.057081 0.049269 -0.047556 0.022443 -0.092672 0.065184 -0.030968 0.097352 -0.052493 0.062981 
0.004188 -0.028991 0.063084 -0.082578 0.005537 -0.030271 0.038801 -0.043018 0.066686 -0.004677 0.054946 
0.013995 -0.011381 0.075888 -0.069206 0.012784 -0.009126 0.068735 -0.066544 0.070738 -0.055201 0.097867 
0.083719 -0.007838 0.018854 -0.098974 0.023769 -0.044483 0.028541 -0.032198 0.047691 -0.005788 0.039455 
0.066290 -0.033136 0.097825 -0.051469 0.012732 -0.038881 0.076786 -0.069891 0.084848 -0.050189 0.017055 
0.077407 -0.088394 0.006851 -0.047383 0.081140 -0.094065 0.002880 -0.072353 0.095627 -0.096577 0.025683 
0.023140 -0.008283 0.096901 -0.011595 0.031076 -0.079637 0.050198 -0.014112 0.027430 -0.012270 0.054234 
0.011981 -0.053835 0.076015 -0.062570 0.082806 -0.040616 0.030618 -0.003141 0.031599 -0.093869 0.048415 
0.065879 -0.060177 0.085832 -0.000699 0.038540 -0.014198 0.018127 -0.013525 0.094031 -0.072898 0.083781 
0.056596 -0.090405 0.092818 -0.013577 0.078385 -0.061543 0.053441 -0.092938 0.032074 -0.017903 0.051810 

top-10 rows C: 
-0.047860 0.095488 -0.149388 0.107110 -0.064596 0.077524 -0.137780 0.177788 -0.089978 0.011358 -0.109692 0.129486 
-0.140052 0.118890 -0.157424 0.003913 -0.101820 0.135206 -0.179292 0.153124 -0.079680 0.105960 -0.149618 0.074780 
-0.085570 0.174606 -0.010738 0.035538 -0.151144 0.151962 -0.128914 0.135474 -0.090384 0.093774 -0.061998 0.013776 
-0.081416 0.000000 -0.179852 0.165640 -0.020956 0.042172 -0.173162 0.000000 -0.108678 0.137812 -0.121710 0.000000 
-0.119434 0.077416 -0.053226 0.107968 -0.176980 0.133528 -0.011234 0.000000 -0.036478 0.195944 -0.147384 0.000000 
-0.104748 0.000000 -0.075412 0.086754 -0.143112 0.151776 -0.005047 0.000000 -0.131386 0.156188 -0.023388 0.078392 
-0.185598 0.022198 0.000000 0.183732 -0.119154 0.058472 -0.127004 0.000000 -0.061688 0.158546 -0.174488 0.096620 
-0.179164 0.000000 -0.005121 0.116612 -0.013844 0.194782 -0.199784 0.079398 -0.072258 0.077040 -0.168774 0.024816 
-0.108286 0.096702 -0.012618 0.005803 -0.147716 0.025806 -0.178060 0.082154 -0.068890 0.060518 -0.142112 0.005524 
-0.097210 0.000000 0.000000 0.000000 -0.132168 0.058594 -0.141198 0.000000 -0.088950 0.150574 -0.126548 0.046274 
-0.067120 0.061054 0.000000 0.000000 -0.105642 0.177918 -0.086420 0.123616 -0.041724 0.116640 -0.057172 0.158298 
-0.175756 0.068254 -0.080194 0.184410 -0.067634 0.199282 -0.005179 0.000000 -0.101528 0.186426 -0.131622 0.150466 

bottom-10 rows C: 
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 
0.029018 0.000000 0.000000 0.000000 0.000000 0.000000 0.130368 0.000000 0.000000 0.000000 0.000000 
0.000000 -0.057982 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 
0.000000 0.000000 0.151776 0.000000 0.000000 0.000000 0.000000 -0.133088 0.000000 0.000000 0.000000 
0.000000 0.000000 0.000000 -0.197948 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 
0.000000 0.000000 0.000000 0.000000 0.025464 0.000000 0.000000 0.000000 0.169696 0.000000 0.000000 
0.000000 0.000000 0.000000 0.000000 0.000000 -0.188130 0.000000 0.000000 0.000000 0.000000 0.000000 
0.000000 0.000000 0.193802 0.000000 0.000000 0.000000 0.100396 0.000000 0.000000 -0.024540 0.000000 
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -0.006282 0.000000 0.000000 0.000000 
0.000000 -0.120354 0.000000 0.000000 0.000000 -0.028396 0.000000 0.000000 0.188062 0.000000 0.167562 
0.000000 0.000000 0.000000 -0.027154 0.000000 0.000000 0.000000 0.000000 0.000000 -0.035806 0.000000 

正如你可以看到,C = A + B只是不工作..

对于矩阵的顶部部分是可以的,但在下半部分干草线。我不知道所有这些零来自哪里...

我做错了什么?

任何有识之士非常感谢。许多在此先感谢,

回答

6

代码的那支给我看的部分最重要的是:

int idx=x*y+x; 

我相信你的意思是:

int idx=maxX*y + x; 

否则,如果x=0y=1000 ,然后idx=0

这也可以解释为什么它适用于像y=0这样的一些而不适用于其他人。

您需要传递矩阵的最大X值。

希望有帮助!

+0

我不认为这是矩阵的大小 - “x”和“y”看起来像是计算线程的地址。但那将是错误。 – Rup 2010-07-20 09:35:37

+0

如何? 'X'和'Y'是计算线程的地址(这是他想要执行计算的矩阵中的位置)。他只是将'X,Y'坐标转换为'float'数组的'idx'。 – KLee1 2010-07-20 15:43:19