2015-01-15 54 views
0

我正在尝试使用汇编语言执行两个小阵列的简单dotproduct计算。这里是我的代码:装配MMX Dotproduct分割故障

#include <cstdio> 
#include <cstdint> 
#include <cstdlib> 


void fillArray(int16_t* a, int16_t* b, int n){ 
    std::srand(1); 
     int i = 0; 
     do 
     { 
      a[i] = rand() % 50; 
      b[i] = rand() % 10; 
      i++; 
     } while (i < n); 
} 

void printArray(int16_t* a, int16_t* b, int n){ 
    int i = 0; 
    do 
    { 
     printf("a[%d]: %d; b[%d]: %d\n", i, a[i], i, b[i]); 
     i++; 
    } while (i < n); 
} 
//control operation 
int16_t dotCpp(int16_t* a, int16_t* b, int n){ 
    int16_t dotProd; 
    int i = 0; 
    do 
    { 
     dotProd += a[i] * b[i]; 
     i++; 
    } while (i < n); 
    return dotProd; 
} 

extern "C" void dotAsm_(int16_t* a, int16_t* b, int16_t *dotProd); 

//dotAsm_ file 
section .data 


section .text 

     global dotAsm_ 

dotAsm_: 

     push  ebp 
     mov   ebp, esp 

     mov   eax, [ebp+8] ;load a 
     mov   ebx, [ebp+12] ;load b 
     mov   ecx, [ebp+16] ;load address of dotProd 

     movq  mm0, [eax]  ;move content of eax to mm0 
     movq  mm1, [ebx]  ;move content of ebx to mm1 

     movq  mm2, mm0  ;copy mm0 

     pmaddwd  mm2, mm1  ;multiply and add 
     movq  mm3, mm2  ;copy mm2 to mm3 
     psrlq  mm3, 32   ;shift mm3 by 32 bits to the right putting the higher-order bits into the lower-order bits 
     paddd  mm2, mm3  ;add lower-order bits saving result in mm2 

     punpcklwd mm4, mm2  ;unpack the lower order bits 
     psrld  mm4, 16   ;shift right by 16 bits, get the result of the addition 

     movq  [ecx], mm4  ;move result back to the register 

     pop   ebp 
     emms 
     ret 

int main(int argc, char *argv[]) 
{ 

    int n = 4; 
    int16_t sum = 0; 
    int16_t *dot; 
    int16_t a[n], b[n]; 

    fillArray(a, b, n); 
    printArray(a, b, n); 
    sum = dotCpp(a, b, n); 
    printf("dotprod: %d\n", sum); 


    dotAsm_(a, b, dot); 
    // printf("ASM dotprod: %i\n", &dot); 

    return 0; 
} 

从makefile文件编译时:在

CXX = g++ 
CXXFLGS = -g -Wall -std=c++11 
SRC = main.o innerProd.o 
EXEC = innerProd 

$(EXEC): $(SRC) 
    $(CXX) $(CXXFLGS) $(SRC) -o $(EXEC) 

innerProd.o: innerProd.asm 
    nasm -f elf -F stabs innerProd.asm -o innerProd.o 

main.o: innerProd.cpp 
    $(CXX) $(CXXFLGS) -c innerProd.cpp -o main.o 

结果:

a[0]: 33; b[0]: 6 
a[1]: 27; b[1]: 5 
a[2]: 43; b[2]: 5 
a[3]: 36; b[3]: 2 
dotprod: 620 
Segmentation fault (core dumped) 

使用gdb来分析原因说明如下:

//having successfully performed the neccessary calculations: 
mm4 {uint64 = 0x26c, v2_int32 = {0x26c, 0x0} ...} 
//hence the correct result 0x26c = (dec) 620 has been obtained however loading it back 
//into the register causes the segmentation fault. 
(gdb) ni 
Program received signal SIGSEGV, Segmentation fault. 
0x08048709 in dotAsm_() 

我不知道为什么我不能将结果返回寄存器。任何建议,高度赞赏。

非常感谢您提前多次。

文森特

+2

在调用之前不应该有'dot'指向的地方吗? –

回答

2

的问题是不能在汇编代码,但在main

int16_t *dot; 

这是一个未初始化的指针;它可以指向任何地方,这通常意味着一个不属于你的随机地址。因此,段错误的位置:

movq  [ecx], mm4 

最快的解决办法是更换

int16_t *dot; 

由:

int16_t dot[1]; 

虽然我会更倾向于做点直整型变量:

int16_t dot; 

而且n将其地址传递给汇编例程:

dotAsm_(a, b, &dot); 
printf("ASM dotprod: %i\n", dot);