我正在使用MPI并行地乘以两个矩阵(二维数组),将这些行均匀分开并将它们分散到子进程中。主人也在一大堆行上工作。我明白如何做到这一点,并成功地使用MPI_Send/MPI_Recv做到了这一点,但现在我正试图用MPI_Bcast做到这一点,并且无法确定何时Bcast以及发送的内容。当我在不同点输出完成的矩阵(C)时,似乎并非所有的行都被计算/更新,我知道这可能是因为我没有正确指定缓冲区。MPI_Bcast矩阵乘法设置
代码:
#include <iostream>
#include <stdlib.h>
#include <mpi.h>
#include <stdio.h>
#include <time.h>
using namespace std;
int main(int argc, char *argv[])
{
int myid, nproc;
int Ibuffer[200]; // Integer buffer, use proper size and type
double Dbuffer[2000]; // Double buffer, use proper size and type
char Sbuffer[200]; // String Buffer
int msg_len;
int i, j, k;
// initialize the MPI Environment and get the needed Data
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
// Get the name of processor
MPI_Get_processor_name(Sbuffer, &msg_len);
int RowA = 5,
ColA = 2,
RowB = ColA,
ColB = 3,
RowC = RowA,
ColC = ColB;
// Start clock
double start_time = MPI_Wtime();
// Initialize matrices
double **matA = new double*[RowA];
for (int i = 0; i < RowA; ++i)
matA[i] = new double[ColA];
double **matB = new double*[RowB];
for (int i = 0; i < RowB; ++i)
matB[i] = new double[ColB];
double **matC = new double*[RowC];
for (int i = 0; i < RowC; ++i)
matC[i] = new double[ColC];
for (int i = 0; i < RowA; i++) // MatA
{
for (int j = 0; j < ColA; j++)
{
matA[i][j] = 2;
}
}
for (int i = 0; i < RowB; i++) // MatB
{
for (int j = 0; j < ColB; j++)
{
matB[i][j] = 2;
}
}
for (int i = 0; i < RowC; i++) // MatC
{
for (int j = 0; j < ColC; j++)
{
matC[i][j] = 0;
}
}
// All procs compute the chunk size, no need to send separate
int chunk = RowA/nproc;
int rest = RowA % nproc;
int my_start_row = myid * chunk; // find my start row
int my_end_row = (myid + 1) * chunk; // find my end row
// assign rest ot last worker
if (myid == nproc-1) my_end_row += rest;
int Dcount = ColA * chunk; // Data count for A to send to worker
MPI_Status status; // Status variable neede for the receive
if (myid == 0)
{
// Send the rows needed for workers (Don't know if I need this or not)
//MPI_Bcast(matA, Dcount, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// Then work on your own part
for (int i= my_start_row; i < my_end_row; i++)
{
for(int j=0; j < ColB; j++)
{
for(int k=0; k < RowB; k++)
{
matC[i][j] = matC[i][j] + (matA[i][k] * matB[k][j]);
}
}
}
for (int n=1; n<nproc; n++)
{
MPI_Bcast(matC, Dcount, MPI_DOUBLE, n, MPI_COMM_WORLD);
printf("\n ==++ Master Receive Result by Worker[%d], \n", n);
}
}
else
{
// This is worker, receive the needed info and start working
//MPI_Bcast(matA, Dcount, MPI_DOUBLE, 0, MPI_COMM_WORLD);
//printf("\n +++ Worker[%d], recived %d rows from Master \n", myid, myid*chunk);
cout << "\n === Master sent rows " << myid * chunk << " through " << (myid+1) * chunk << " to process #" << myid << endl;
// Do the work first
for (int i= my_start_row; i < my_end_row; i++)
{
for(int j=0; j < ColB; j++)
{
for(int k=0; k < RowB; k++)
{
matC[i][j] = matC[i][j] + (matA[i][k] * matB[k][j]);
}
}
}
// Send the result to the Master
MPI_Bcast(matC, Dcount, MPI_DOUBLE, myid, MPI_COMM_WORLD);
printf("\n --- Worker[%d], Sent Result to Master \n", myid);
}
// End clock
double end_time = MPI_Wtime();
if (myid == 0) {
cout << "\nParallel Exec time: " << end_time - start_time << endl;
}
MPI_Finalize();
// Clean up and release the storage
for (int i=0; i< RowA; i++)
{
delete [] matA[i];
matA[i] = NULL;
}
delete [] matA;
matA = NULL;
for (int i=0; i< RowA; i++)
{
delete [] matC[i];
matC[i] = NULL;
}
delete [] matC;
matC = NULL;
}
如果这个问题太模糊或麻烦我明白了,我只是想知道,如果我错误地理解如何以及何时使用BCAST。