2017-02-19 66 views
1

我写了一个程序(希望)可以计算两个信号之间的互相关。虽然我已经完成了如何执行计算的很好的搜索,但我无法弄清楚一些重要的细节。我特别关心平均计算。似乎有些算法利用整个数据集的平均值来执行每次移位(或延迟)的相关计算。换句话说,他们使用不变的平均值。我甚至发现了一些只计算一次分母的算法,将它用作其余延迟的常量值。但是,我认为平均值和分母都应该迭代计算,只考虑叠加范围内的数据。因此,我为这个程序写了两个版本。他们似乎在较小的延迟中产生非常相似的结果。我想知道哪一个是正确的。如何正确计算互相关?

迭代平均:

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 

FILE *input1, *input2, *output; 
int m = 0, n = 0, t; 
float *VarA, *VarB, *Results, *Results2; 

void open_inputs_and_output(); 

void count_and_check_lines(); 

void allocate_memory(); 

void read_data(); 

void allocate_memory2(); 

void write_output(); 

int main() 
{ 
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl; 
    int p = 0, i, delay; 

    open_inputs_and_output(); 

    count_and_check_lines(); 

    rewind(input1); 
    rewind(input2); 

    allocate_memory(); 

    read_data(); 

    fclose(input1); 
    fclose(input2); 

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n"); 
    scanf("%i", &p); 

    if(p < -1) 
    { 
     printf("Bad number!\n"); 
     exit(1); 
    } 
    else if(p == -1) 
     t = n; 
    else 
     t = p; 

    allocate_memory2(); 

    printf("\nWait...\n\n"); 

    for(delay = 0; delay < t; delay++) 
    { 
     for(i = delay; i < n; i ++) 
     { 
      SumAverageA += VarA[i]; 
      SumAverageB += VarB[(i - delay)]; 
     } 

     AverageA = SumAverageA/(n - delay); 
     AverageB = SumAverageB/(n - delay); 

     for(i = delay; i < n; i++) 
     { 
      SubA = VarA[i] - AverageA; 
      SubB = VarB[(i - delay)] - AverageB; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 

     for(i = delay; i < n; i++) 
     { 
      SubB = VarB[i] - AverageB; 
      SubA = VarA[(i - delay)] - AverageA; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results2[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 
    } 

    printf("Calculations performed.\n"); 

    free(VarA); 
    free(VarB); 

    write_output(); 

    free(Results); 
    free(Results2); 

    fclose(output); 

    return 0; 
} 

void open_inputs_and_output() 
{ 
    input1 = fopen("C:\\...\\test.txt","r"); 

    if (input1 == NULL) 
    { 
     printf("Error! Could not open input 1.\n"); 
     exit(1); 
    } 
    else 
     printf("Input1 opening: OK.\n"); 

    input2 = fopen("C:\\...\\test2.txt","r"); 

    if (input2 == NULL) 
    { 
     printf("Error! Could not open input 2.\n"); 
     exit(1); 
    } 
    else 
     printf("Input2 opening: OK.\n"); 

    output = fopen("C:\\...\\out.txt","w"); 

    if (output == NULL) 
    { 
     printf("Error! Could not open output.\n"); 
     exit(1); 
    } 
    else 
     printf("Output opening: OK.\n"); 
} 

void count_and_check_lines() 
{ 
    float k; 

    while(fscanf(input1,"%f",&k) == 1) 
     n++; 

    printf("n = %i\n", n); 

    while(fscanf(input2,"%f",&k) == 1) 
     m++; 

    printf("m = %i\n", m); 

    if(m != n) 
    { 
     printf("Error: Number of rows does not match!\n"); 
     exit(1); 
    } 
    else 
     printf("Number of rows matches.\n"); 
} 

void allocate_memory() 
{ 
    VarA = calloc(n, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarA.\n"); 
     exit(1); 
    } 

    VarB = calloc(m, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarB.\n"); 
     exit(1); 
    } 
} 

void read_data() 
{ 
    int i; 

    for(i = 0; i < n; i++) 
     fscanf(input1,"%f",&VarA[i]); 

    printf("Data A successfully read.\n"); 

    for(i = 0; i < m; i++) 
     fscanf(input2,"%f",&VarB[i]); 

    printf("Data B successfully read.\n"); 
} 

void allocate_memory2() 
{ 
    Results = calloc(t, sizeof(float)); 

    if(Results == NULL) 
    { 
     printf("Could not allocate memory for Results.\n"); 
     exit(1); 
    } 

    Results2 = calloc(t, sizeof(float)); 

    if(Results2 == NULL) 
    { 
     printf("Could not allocate memory for Results2.\n"); 
     exit(1); 
    } 
} 

void write_output() 
{ 
    int i; 

    for(i = t - 1; i > 0; i--) 
     fprintf(output,"-%i %f\n", i , Results2[i]); 

    for(i = 0; i < t; i++) 
     fprintf(output,"%i %f\n", i , Results[i]); 

    printf("Results written.\n"); 
} 

恒定平均:

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 

FILE *input1, *input2, *output; 
int m = 0, n = 0, t; 
float *VarA, *VarB, *Results, *Results2; 

void open_inputs_and_output(); 

void count_and_check_lines(); 

void allocate_memory(); 

void read_data(); 

void allocate_memory2(); 

void write_output(); 

int main() 
{ 
    float SumAverageA = 0, SumAverageB = 0, AverageA, AverageB, SubA, SubB, SumAB = 0, SumAs = 0, SumBs = 0, Correl; 
    int p = 0, i, delay; 

    open_inputs_and_output(); 

    count_and_check_lines(); 

    rewind(input1); 
    rewind(input2); 

    allocate_memory(); 

    read_data(); 

    fclose(input1); 
    fclose(input2); 

    printf("How many lag steps from the origin do you want to calculate?\nIf you want as many steps as the number of input points, type -1.\n"); 
    scanf("%i", &p); 

    if(p < -1) 
    { 
     printf("Bad number!\n"); 
     exit(1); 
    } 
    else if(p == -1) 
     t = n; 
    else 
     t = p; 

    allocate_memory2(); 

    printf("\nWait...\n\n"); 

    for(i = 0; i < n; i ++) 
    { 
     SumAverageA += VarA[i]; 
     SumAverageB += VarB[i]; 
    } 

    AverageA = SumAverageA/n; 
    AverageB = SumAverageB/n; 

    for(delay = 0; delay < t; delay++) 
    { 
     for(i = delay; i < n; i++) 
     { 
      SubA = VarA[i] - AverageA; 
      SubB = VarB[(i - delay)] - AverageB; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 

     for(i = delay; i < n; i++) 
     { 
      SubB = VarB[i] - AverageB; 
      SubA = VarA[(i - delay)] - AverageA; 
      SumAB += (SubA * SubB); 
      SumAs += (SubA * SubA); 
      SumBs += (SubB * SubB); 
     } 

     Correl = SumAB/(sqrt(SumAs * SumBs)); 

     Results2[delay] = Correl; 

     SumAverageA = 0; 
     SumAverageB = 0; 
     SumAB = 0; 
     SumAs = 0; 
     SumBs = 0; 
    } 

    printf("Calculations performed.\n"); 

    free(VarA); 
    free(VarB); 

    write_output(); 

    free(Results); 
    free(Results2); 

    fclose(output); 

    return 0; 
} 

void open_inputs_and_output() 
{ 
    input1 = fopen("C:\\...\\test.txt","r"); 

    if (input1 == NULL) 
    { 
     printf("Error! Could not open input 1.\n"); 
     exit(1); 
    } 
    else 
     printf("Input1 opening: OK.\n"); 

    input2 = fopen("C:\\...\\test2.txt","r"); 

    if (input2 == NULL) 
    { 
     printf("Error! Could not open input 2.\n"); 
     exit(1); 
    } 
    else 
     printf("Input2 opening: OK.\n"); 

    output = fopen("C:\\...\\out.txt","w"); 

    if (output == NULL) 
    { 
     printf("Error! Could not open output.\n"); 
     exit(1); 
    } 
    else 
     printf("Output opening: OK.\n"); 
} 

void count_and_check_lines() 
{ 
    float k; 

    while(fscanf(input1,"%f",&k) == 1) 
     n++; 

    printf("n = %i\n", n); 

    while(fscanf(input2,"%f",&k) == 1) 
     m++; 

    printf("m = %i\n", m); 

    if(m != n) 
    { 
     printf("Error: Number of rows does not match!\n"); 
     exit(1); 
    } 
    else 
     printf("Number of rows matches.\n"); 
} 

void allocate_memory() 
{ 
    VarA = calloc(n, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarA.\n"); 
     exit(1); 
    } 

    VarB = calloc(m, sizeof(float)); 

    if(VarA == NULL) 
    { 
     printf("Could not allocate memory for VarB.\n"); 
     exit(1); 
    } 
} 

void read_data() 
{ 
    int i; 

    for(i = 0; i < n; i++) 
     fscanf(input1,"%f",&VarA[i]); 

    printf("Data A successfully read.\n"); 

    for(i = 0; i < m; i++) 
     fscanf(input2,"%f",&VarB[i]); 

    printf("Data B successfully read.\n"); 
} 

void allocate_memory2() 
{ 
    Results = calloc(t, sizeof(float)); 

    if(Results == NULL) 
    { 
     printf("Could not allocate memory for Results.\n"); 
     exit(1); 
    } 

    Results2 = calloc(t, sizeof(float)); 

    if(Results2 == NULL) 
    { 
     printf("Could not allocate memory for Results2.\n"); 
     exit(1); 
    } 
} 

void write_output() 
{ 
    int i; 

    for(i = t - 1; i > 0; i--) 
     fprintf(output,"-%i %f\n", i , Results2[i]); 

    for(i = 0; i < t; i++) 
     fprintf(output,"%i %f\n", i , Results[i]); 

    printf("Results written.\n"); 
} 

参考文献:

http://www.jot.fm/issues/issue_2010_03/column2.pdf

http://paulbourke.net/miscellaneous/correlate/

+0

欢迎来到Stack Overflow。对不起,你在5个多小时内没有得到任何答复;这是相对不寻常的。既然你似乎已经找到了一些可以用各种方法做事情的算法,也许答案是“当算法是应该使用的算法时,每个算法都是正确的”,而你的问题是你不确定你应该在你的算法中使用哪种算法情况。你读过哪些参考资料?当算法适用时他们说什么?他们是否说过其他选择,以及为什么不应该使用其他选项? (请将其他信息添加到您的问题中,请不要作为评论。) –

+0

他们对替代方法没有多说。我的方法只是基于对相关方程和互相关概念的分析的逻辑推论。这可能是正确的,但也可能不正确。数据处理和统计不完全是我的领域。 – user3277482

回答

0

如果您的进程是wide sense stationary,那么平均值不会随着时间而改变。如果过程也是ergodic,那么可以通过计算单个时间序列的平均值来获得这些平均值。在这种情况下,您最好使用所有可用的样本来尽可能精确地估计平均值。这自然会导致您的“恒常平均”实施。

另一方面,如果你的过程不是广义的平稳和遍历的,那么获得他们当地方法的一个好的估计可能被证明是一个更大的挑战。估计过程近似平稳的较小时间窗内的平均值可能是一种合理的方法(这与您的“迭代平均值”实现类似)。请注意,还有其他方法,它们的适用性取决于具体的应用和特定信号的属性。

然后,您可能会想知道如何知道您的流程是否具有广义静态。不幸的是,除非你对这些过程如何产生了解很多,否则通常最好的做法是在假设过程是广义静止的假设下工作,然后尝试反驳该假设(通过观察超出预期范围的结果;参见statistical hypothesis testing)。

+0

谢谢。它使事情更清晰。我正在处理一种蛋白质的分子动力学轨迹,它在开放和闭合状态之间波动(寿命范围从5到几十纳秒)。考虑到在一个很大(但不足以导致蛋白质去折叠)时间尺度的平均值应该是恒定的,我相信测量的变量可以被认为是遍历的,但我不确定WSS。因为在较短的时间尺度上,平均值会发生变化,但如果数据在较大的时间尺度上累积,则平均值趋于恒定。 – user3277482