2016-01-21 53 views
0

我想写一些c代码来使用scalapack中的pzheevd例程来查找大型矩阵的所有特征值。我有以下简单的例子,它已经硬编码了一个简单的4x4矩阵。使用单个进程,2个进程或4个我可以得到正确的特征值(-2.0396,-2,2,2.0396)。然而,使用不相称的数字(如3)返回的特征值是不正确的,即使它看起来像所有矩阵元素被正确分配。Scalapack返回错误的答案

要构建的代码使用:

mpicc -g test.c -llapack -o test -lblacs-openmpi -lblacsCinit-openmpi -L/usr/local/lib -lscalapack -lgfortran -lm -llapack -lblas 

实例的作品:

$ mpirun -n 1 ./test 
Info: 0 
Eigenvalues: -2.039608 -2.000000 2.000000 2.039608 

并没有一个:

$ mpirun -n 3 ./test 
Info: 0 
Eigenvalues: -2.223729 -1.805190 2.003994 2.024926 

,代码:

#include <stdio.h> 
#include <math.h> 
#include <stdlib.h> 
#include "mpi.h" 

typedef struct complex16{double dr,di;} complex16; 

extern void Cblacs_get(int context, int what, int *val); 
extern void Cblacs_gridinit(int* context, char* order, 
          int nproc_rows, int nproc_cols); 
extern void Cblacs_pcoord(int context, int p, 
          int* my_proc_row, int* my_proc_col); 
extern void Cblacs_exit(int doneflag); 
extern void descinit_(int* descrip, int* m, int* n, 
         int* row_block_size, int* col_block_size, 
         int* first_proc_row, int* first_proc_col, 
         int* blacs_grid, int* leading_dim, 
         int* error_info); 
extern int numroc_(int* order, int* block_size, 
        int* my_process_row_or_col, int* first_process_row_or_col, 
        int* nproc_rows_or_cols); 
extern void pzheevd_(char *jobz, char *uplo, int *n, complex16 *a, int *ia, int *ja, int *desca, double *w, complex16 *z, int *iz, int *jz, int *descz, complex16 *work, int *lwork, double *rwork, int *lrwork, int *iwork, int *liwork, int *info); 

main(int argc, char** argv) { 
    int my_rank, size, m, n; 
    int row_block_size=1, col_block_size=1; 
    int nproc_rows, nproc_cols; 
    int my_process_row, my_process_col; 
    int blacs_grid; 
    int first_proc_row = 0, first_proc_col = 0; 
    int descrip[9], info, nlocal_rows, nlocal_cols; 
    int i,j; 
    int leading_dim; 
    m=4; n=4; 

    MPI_Init(&argc, &argv); 
    MPI_Comm_size(MPI_COMM_WORLD, &size); 
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); 

    nproc_rows = sqrt(size); 
    nproc_cols = size/nproc_rows; 

    Cblacs_get(0, 0, &blacs_grid); 
    Cblacs_gridinit(&blacs_grid, "R", nproc_rows, nproc_cols); 
    Cblacs_pcoord(blacs_grid, my_rank, &my_process_row,&my_process_col); 

    nlocal_rows = numroc_(&m, &row_block_size, &my_process_row, &first_proc_row, &nproc_rows); 
    nlocal_cols = numroc_(&n, &col_block_size, &my_process_col, &first_proc_col, &nproc_cols); 
    leading_dim = numroc_(&m, &col_block_size, &my_process_row, &first_proc_col, &nproc_rows); 
    descinit_(descrip, &m, &n, &row_block_size, &col_block_size, &first_proc_row, &first_proc_col, &blacs_grid, &leading_dim, &info); 

    complex16 *a, *z; 
    double *w; 
    a = (complex16*)malloc(nlocal_rows * nlocal_cols * sizeof(complex16)); 
    z = (complex16*)malloc(nlocal_rows * nlocal_cols * sizeof(complex16)); 
    w = (double*)malloc(n * sizeof(double)); 

    double *mat_els; 
    mat_els = (double *)malloc(n*m * sizeof(double)); 
    mat_els[0] = -2.0;mat_els[1]=-0.2; mat_els[2] = -0.2; mat_els[3] = 0.0; 
    mat_els[4] = -0.2;mat_els[5]=2.0; mat_els[6] = 0.0; mat_els[7] = -0.2; 
    mat_els[8] = -0.2;mat_els[9]=0.0; mat_els[10] = 2.0; mat_els[11] = -0.2; 
    mat_els[12] = 0.0;mat_els[13]=-0.2; mat_els[14] = -0.2; mat_els[15] = -2.0; 

    int full_row, full_col; 
    for(i = 0; i < nlocal_rows; i++) 
    { 
     for(j = 0; j < nlocal_cols; j++) 
     { 
      full_row = i * nproc_rows + my_process_row; 
      full_col = j * nproc_cols + my_process_col; 
      a[(i*nlocal_cols + j)].dr = mat_els[full_row * m + full_col]; 
      a[(i*nlocal_cols + j)].di = 0.0; 
     } 
    } 
    char jobz = 'V'; // N not implemented yet. 
    char uplo = 'U'; 
    int ai = 1, aj = 1, zi = 1, zj = 1; 

    double *rwork; 
    complex16 *work; 
    int *iwork; 
    int lwork, lrwork, liwork; 

    rwork = (double*)malloc(2 * sizeof(double)); 
    work = (complex16*)malloc(2 * sizeof(complex16)); 
    iwork = (int*)malloc(2 * sizeof(int)); 

    lwork = -1; lrwork = -1; liwork = -1; 
    pzheevd_(&jobz, &uplo, &n, a, &ai, &aj, descrip, w, z, &zi, &zj, descrip, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); 
    lwork = work[0].dr; lrwork = rwork[0]; liwork = iwork[0]; 
    free(work); free(rwork); free(iwork); 

    rwork = (double*)malloc(lrwork * sizeof(double)); 
    work = (complex16*)malloc(lwork * sizeof(complex16)); 
    iwork = (int*)malloc(liwork * sizeof(int)); 
    pzheevd_(&jobz, &uplo, &n, a, &ai, &aj, descrip, w, z, &zi, &zj, descrip, work, &lwork, rwork, &lrwork, iwork, &liwork, &info); 

    if (my_rank == 0) 
    { 
     printf("Info: %d\n", info); 
     printf("Eigenvalues: "); 
     for(i = 0; i < n;i++) 
      { 
      printf("%lf ", w[i]); 
      } 
     printf("\n"); 
    } 
    free(w);free(z);free(a); 
    free(work);free(iwork);free(rwork); 
    Cblacs_exit(1); 
    MPI_Finalize(); 
} 

回答

1

我发现了我应该早点猜到的解决方案。矩阵元素应该使用Fortran列顺序格式而不是C样式行顺序格式来提供。将for循环中的元素分配更改为以下内容可以解决问题,并且现在可以为所有数量的进程(可以形成有效的网格)找到相同的特征值。

a[(j*nlocal_rows + i)].dr = mat_els[full_row * m + full_col]; 
a[(j*nlocal_rows + i)].di = 0.0;