2017-08-11 84 views
-1

我试图在FPGA(Zynq ZC 702)中实现用于小波变换的C代码,但代码卡住了,这是因为内存问题,所以我应该优化我的代码,但我不知道如何。如何优化我的c代码?

任何人都可以请给我一些想法如何做到这一点?

这是主代码

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <math.h> 
#include "wavemin.h" 
#include "waveaux.h" 
#include "waveaux.c" 
#include "wavemin.c" 

int main() { 
    printf("Hello World1 \n\r"); 
    wave_object obj; 
    wt_object wt; 
    float *inp, *out; 
    int N, i, J,k; 
    float temp[1280] = {}; 
    char *name = "db4"; 
    obj = wave_init(name); 

    printf("Hello World2 \n\r"); 
    N = 1280; 
    inp = (float*)malloc(sizeof(float) * N); 
    out = (float*)malloc(sizeof(float) * N); 

    //wmean = mean(temp, N); 
    for (i = 0; i < N; ++i) { 
     inp[i] = temp[i]; 

     printf("Hello World3 \n\r"); 
     //printf("%g \n", inp[i]); 
    } 

    J = 4; //Decomposition Levels 
    wt = wt_init(obj, "dwt", N, J); // Initialize the wavelet transform object 
    printf("Hello World4 \n\r"); 
    setDWTExtension(wt, "sym");  // Options are "per" and "sym". Symmetric is the default option 
    printf("Hello World5 \n\r"); 

    setWTConv(wt, "direct"); 
    printf("Hello World6 \n\r"); 
    dwt(wt, inp);  // Perform DWT 
    printf("Hello World7 \n\r"); 

    //getDWTAppx(wt, out, wt->length[0]); 
    // printf("Approximation Coefficients Level 1 \n"); 
    // for (i = 0; i < wt->length[0]; ++i) { 
    // printf("%g ", out[i]); 
    // } 
    // printf("\n\n"); 
    for (k = 1; k <= J; ++k) { 
     getDWTDetail(wt, out, wt->length[k], k); 
     printf("Detail Coefficients Level %d Length %d \n", 
       k, wt - length[k]); 
     for (i = 0; i < wt->length[k]; ++i) { 
      printf("%g ", out[i]); 
     } 
     printf("\n\n"); 
    } 
    wt_summary(wt);// Prints the full summary. 
    printf("Hello World8 \n\r"); 
    wave_free(obj); 
    wt_free(wt); 
    free(inp); 
    free(out); 

    return 0; 
} 

代码的其他部分,其中有在main功能使用的功能:

#include "wavemin.h" 

wave_object wave_init(char *wname) { 
    wave_object obj = NULL; 
    int retval; 
    retval = 0; 

    if (wname != NULL) { 
     retval = filtlength(wname); 
    } 

    obj = (wave_object)malloc(sizeof(struct wave_set) + sizeof(float) * 4 * 
           retval); 

    obj->filtlength = retval; 
    obj->lpd_len = obj->hpd_len = obj->lpr_len = obj->hpr_len = obj->filtlength; 
    strcpy(obj->wname, wname); 
    if (wname != NULL) { 
     filtcoef(wname, obj->params, obj->params + retval, obj->params + 2 * 
       retval, obj->params + 3 * retval); 
    } 
    obj->lpd = &obj->params[0]; 
    obj->hpd = &obj->params[retval]; 
    obj->lpr = &obj->params[2 * retval]; 
    obj->hpr = &obj->params[3 * retval]; 

    return obj; 
} 

wt_object wt_init(wave_object wave, char *method, int siglength, int J) { 
    int size, i, MaxIter; 
    wt_object obj = NULL; 

    size = wave->filtlength; 

    MaxIter = wmaxiter(siglength, size); 

    if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) { 
     obj = (wt_object)malloc(sizeof(struct wt_set) + sizeof(float) * 
           (siglength + 2 * J * (size + 1))); 
     obj->outlength = siglength + 2 * J * (size + 1); // Default 
     strcpy(obj->ext, "sym"); // Default 
    } 

    obj->wave = wave; 
    obj->siglength = siglength; 
    obj->J = J; 
    obj->MaxIter = MaxIter; 
    strcpy(obj->method, method); 

    if (siglength % 2 == 0) { 
     obj->even = 1; 
    } 
    else { 
     obj->even = 0; 
    } 

    strcpy(obj->cmethod, "direct"); // Default 
    obj->cfftset = 0; 
    obj->lenlength = J + 2; 
    obj->output = &obj->params[0]; 
    if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) { 
     for (i = 0; i < siglength + 2 * J * (size + 1); ++i) { 
      obj->params[i] = 0.0; 
     } 
    } 
    //wave_summary(obj->wave); 

    return obj; 
} 


static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA, 
        float *cD, int len_cD) { 
    int i, l, t, len_avg; 

    len_avg = wt->wave->lpd_len; 

    for (i = 0; i < len_cA; ++i) { 
     t = 2 * i + 1; 
     cA[i] = 0.0; 
     cD[i] = 0.0; 
     for (l = 0; l < len_avg; ++l) { 
      if ((t - l) >= 0 && (t - l) < N) { 
       cA[i] += wt->wave->lpd[l] * inp[t - l]; 
       cD[i] += wt->wave->hpd[l] * inp[t - l]; 
       printf("world1 \n\r"); 
      } 
      else if ((t - l) < 0) { 
       cA[i] += wt->wave->lpd[l] * inp[-t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[-t + l - 1]; 
       printf("world2 \n\r"); 
      } 
      else if ((t - l) >= N) { 
       cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1]; 

       printf("world3 \n\r"); 
      } 
     } 
    } 
} 

void dwt(wt_object wt, float *inp) { 
    int i, J, temp_len, iter, N, lp; 
    int len_cA; 
    float *orig, *orig2; 

    temp_len = wt->siglength; 
    J = wt->J; 
    wt->length[J + 1] = temp_len; 
    wt->outlength = 0; 
    wt->zpad = 0; 
    orig = (float*)malloc(sizeof(float) * temp_len); 
    orig2 = (float*)malloc(sizeof(float) * temp_len); 

    for (i = 0; i < wt->siglength; ++i) { 
     orig[i] = inp[i]; 
     printf("Hello1 \n\r"); 
    } 

    if (wt->zpad == 1) { 
     orig[temp_len - 1] = orig[temp_len - 2]; 
     printf("Hello2 \n\r"); 
    } 

    N = temp_len; 
    lp = wt->wave->lpd_len; 

    if (!strcmp(wt->ext, "sym")) { 
     //printf("\n YES %s \n", wt->ext); 
     i = J; 
     while (i > 0) { 
      N = N + lp - 2; 
      N = (int)ceil((float)N/2.0); 
      wt->length[i] = N; 
      wt->outlength += wt->length[i]; 
      i--; 
     } 
     wt->length[0] = wt->length[1]; 
     wt->outlength += wt->length[0]; 
     N = wt->outlength; 
     printf("Hello3 \n\r"); 

     for (iter = 0; iter < J; ++iter) { 
      len_cA = wt->length[J - iter]; 
      N -= len_cA; 
      dwt_sym(wt, orig, temp_len, orig2, len_cA, wt->params + N, len_cA); 
      temp_len = wt->length[J - iter]; 
      printf("Hello4 \n\r"); 

      if (iter == J - 1) { 
       for (i = 0; i < len_cA; ++i) { 
        wt->params[i] = orig2[i]; 
        printf("Hello5 \n\r"); 
       } 
      } else { 
       for (i = 0; i < len_cA; ++i) { 
        orig[i] = orig2[i]; 
        printf("Hello6 \n\r"); 
       } 
      } 
     } 
    } else { 
     printf("Signal extension can be either per or sym"); 
     exit(-1); 
    } 

    free(orig); 
    free(orig2); 
} 

void setDWTExtension(wt_object wt, char *extension) { 
    if (!strcmp(extension, "sym")) { 
     strcpy(wt->ext, "sym"); 
    } else { 
     printf("Signal extension can be either per or sym"); 
     exit(-1); 
    } 
} 

void setWTConv(wt_object wt, char *cmethod) { 
    if (!strcmp(cmethod, "direct")) { 
     strcpy(wt->cmethod, "direct"); 
    } 
} 

void getDWTDetail(wt_object wt, float *detail, int N, int level) { 
    /* 
     returns Detail coefficents at the jth level where j = 1,2,.., J 
     and Wavelet decomposition is stored as 
     [A(J) D(J) D(J-1) ..... D(1)] in wt->output vector 
     Use getDWTAppx() to get A(J) 
     Level 1 : Length of D(J), ie N, is stored in wt->length[1] 
     Level 2 :Length of D(J-1), ie N, is stored in wt->length[2] 
     .... 
     Level J : Length of D(1), ie N, is stored in wt->length[J] 
    */ 
    int i, iter, J; 
    J = wt->J; 

    if (level > J) { 
     printf("The decomposition only has %d levels", J); 
    } 

    iter = wt->length[0]; 

    for (i = 1; i < level; ++i) { 
     iter += wt->length[i]; 
    } 

    for (i = 0; i < N; ++i) { 
     detail[i] = wt->output[i + iter]; 
    } 
} 

void getDWTAppx(wt_object wt, float *appx, int N) { 
    /* 
     Wavelet decomposition is stored as 
     [A(J) D(J) D(J-1) ..... D(1)] in wt->output vector 

     Length of A(J) , N = wt->length[0] 
    */ 
    int i; 

    for (i = 0; i < N; ++i) { 
     appx[i] = wt->output[i]; 
    } 
} 

void wt_summary(wt_object wt) { 
    int i; 
    int J, t; 
    J = wt->J; 

    printf("Wavelet Coefficients are contained in vector : %s \n", "output"); 
    printf("\n"); 
    printf("Approximation Coefficients \n"); 
    printf("Level %d Access : output[%d] Length : %d \n", 
      1, 0, wt->length[0]); 
    printf("\n"); 
    printf("Detail Coefficients \n"); 
    t = wt->length[0]; 
    for (i = 0; i < J; ++i) { 
     printf("Level %d Access : output[%d] Length : %d \n", 
       i + 1, t, wt->length[i + 1]); 
     t += wt->length[i + 1]; 
    } 
    printf("\n"); 

} 
void wave_free(wave_object object) { 
    free(object); 
} 

void wt_free(wt_object object) { 
    free(object); 
} 

enter image description here

+3

请正确格式化您的代码。 –

+0

也许这个问题更适合[code review](https://codereview.stackexchange.com/)? – xander

+0

如果您的代码正在工作,而您只是在寻找优化提示,那么您的位置不对。 StackOverflow用于解决代码的特定问题。 “太慢”太广泛。请检查https://codereview.stackexchange.com/ – Yunnosch

回答

2

在您的代码

  1. 务必检查是否的malloc返回非NULL值

  2. 检查栈和堆设置在连接文件中声明巨大的局部变量,并做了很多mallocs的 - 我怀疑(学校名称预兆)栈溢出,或者失败的malloc。

它是裸机程序,还是在某种操作系统下运行它?

+1

代码我会添加:停止铸造'malloc()'的输出,这是C++特有的,而不是C。 – perror

+0

但是它并没有伤害,因为他包含'stdlib',即使编译器在C11之前它也不会导致程序崩溃。风格 –

+1

不仅关于样式,它还可能掩盖一些可以在编译时检测到的错误。例如,看到这个[SO问题](https://stackoverflow.com/questions/605845/do-i-cast-the (malloc的结果)(第一个答案的最后一点) – perror

0

只是为了风格和简洁的事,我会改写这个:

if (siglength % 2 == 0) { 
     obj->even = 1; 
    } 
    else { 
     obj->even = 0; 
    } 

为以下代码:

obj->even = !(siglength % 2); 

,或者:

obj->even = (siglength % 2) ? 0 : 1; 

而且,我认为这个功能还有优化的空间:

static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA, 
        float *cD, int len_cD) { 
    int i, l, t, len_avg; 

    len_avg = wt->wave->lpd_len; 

    for (i = 0; i < len_cA; ++i) { 
     t = 2 * i + 1; 
     cA[i] = 0.0; 
     cD[i] = 0.0; 
     for (l = 0; l < len_avg; ++l) { 
      if ((t - l) >= 0 && (t - l) < N) { 
       cA[i] += wt->wave->lpd[l] * inp[t - l]; 
       cD[i] += wt->wave->hpd[l] * inp[t - l]; 
       printf("world1 \n\r"); 
      } 
      else if ((t - l) < 0) { 
       cA[i] += wt->wave->lpd[l] * inp[-t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[-t + l - 1]; 
       printf("world2 \n\r"); 
      } 
      else if ((t - l) >= N) { 
       cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1]; 
       cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1]; 

       printf("world3 \n\r"); 
      } 
     } 
    } 
} 

首先,你总是提到t - 1永不t本身,所以为什么不能有:

t = 2 * i; 

而且,我可以猜测,大量的运算,可以放置在内循环之外..如果你想优化,这里有很多好的候选人。

关于优化的最后一句话!

您应该首先对您的软件进行配置,然后在考虑优化之前查看您最花费的时间。如果不知道你的软件真的在哪里挣扎,你无法优化“在空中”。考虑使用gprof

PS:你永远也不会使用作​​为一个变量信l(ELL)......那是一种从数1(一个)关闭。考虑改变这也是,它可以改善阅读。