2016-03-08 30 views
-1

首先,我确定我是法语的,而我的英语不太好。使用线程创建与pthread_create调用函数MPI_Finalize在一个MPI应用程序中写入C

我正在研究MPI应用程序,我遇到了一些问题,希望有人能帮助我。

正如我在文章的标题中所报道的,我尝试使用线程来监听当我必须杀死我的应用程序然后调用MPI_Finalize函数。

但是,我的应用程序没有完成正确的。 更精确地,我得到以下信息:

[XPS-2720:27441] *过程接收信号*

[XPS-2720:27441]信号:分割故障(11)

[XPS -2720:27441]信号代码:地址没有被映射(1)

[XPS-2720:27441]在地址失败:0x7f14077a3b6d

[XPS-2720:27440] *过程接收信号*

[XPS-2720:27440]信号:分割故障(11)

[XPS-2720:27440]信号代码:地址没有被映射(1)

[XPS-2720:27440]在地址失败:0x7fb11d07bb6d


的mpirun注意到过程秩1与PID 27440上节点lagniez-XPS-2720退出上信号11(分段故障)。

我从代码:

#include "mpi.h" 
 
#include <stdio.h> 
 
#include <stdlib.h> 
 
#include <signal.h> 
 
#include <unistd.h> 
 
#include <sys/types.h> 
 
#include <pthread.h> 
 
#include <cassert> 
 

 

 
#define send_data_tag 1664 
 
#define send_kill_tag 666 
 

 
void *finilizeMPICom(void *intercomm) 
 
{ 
 
    printf("the finilizeMPICom was called\n"); 
 
    
 
    MPI_Comm parentcomm = * ((MPI_Comm *) intercomm); 
 
    MPI_Status status; 
 
    int res; 
 

 
    // sleep(10); 
 
    MPI_Recv(&res, 1, MPI_INT, 0, send_kill_tag, parentcomm, &status); 
 

 
    int rank; 
 
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); 
 
    printf("we receive something %d -- %d\n", rank, res); 
 
    
 
    MPI_Finalize(); 
 
    exit(0); 
 
}// finilizeMPICom 
 

 

 
int main(int argc, char *argv[]) 
 
{ 
 
    int numtasks, rank, len, rc; 
 
    char hostname[MPI_MAX_PROCESSOR_NAME]; 
 

 
    int provided, claimed; 
 
    rc = MPI_Init_thread(0, 0, MPI_THREAD_MULTIPLE, &provided); 
 
    MPI_Query_thread(&claimed); 
 
    
 
    if (rc != MPI_SUCCESS || provided != 3) 
 
    { 
 
     printf ("Error starting MPI program. Terminating.\n"); 
 
     MPI_Abort(MPI_COMM_WORLD, rc); 
 
    } 
 
    
 
    MPI_Comm_rank(MPI_COMM_WORLD,&rank); 
 

 
    MPI_Comm parentcomm; 
 
    MPI_Comm_get_parent(&parentcomm); 
 

 
    /* create a second thread to listen when we have to kill the program */ 
 
    pthread_t properlyKill; 
 
    if(pthread_create(&properlyKill, NULL, finilizeMPICom, (void *) &parentcomm)) 
 
    {  
 
     fprintf(stderr, "Error creating thread\n"); 
 
     return 0; 
 
    } 
 
    
 
    assert(parentcomm != MPI_COMM_NULL); 
 

 
    MPI_Status status; 
 
    int root_process, ierr, num_rows_to_receive; 
 

 
    int mode; 
 
    MPI_Recv(&mode, 1, MPI_INT, 0, send_data_tag, parentcomm, &status); 
 
    printf("c The solver works in the mode %d\n", mode); 
 

 
    printf("I sent a message %d\n", rank); 
 

 
    // if(rank != 1) sleep(100); 
 
    
 
    int res = 1; 
 
    MPI_Send(&res, 1, MPI_INT, 0, send_data_tag, parentcomm); 
 
    printf("we want to listen for somethiing %d\n", rank); 
 
    
 
    int rescc = 1; 
 
    MPI_Recv(&rescc, 1, MPI_INT, 0, send_data_tag, parentcomm, &status); 
 
    printf("I received the message %d %d\n", rescc, rank); 
 
    
 
    if(rescc == 1000) 
 
    { 
 
     printf("~~~~~~~~>>> I print the solution %d\n", rank); 
 
     int res3 = 1001; 
 
     MPI_Send(&res3, 1, MPI_INT, 0, send_data_tag, parentcomm); 
 
    } 
 
    else printf("I do not understand %d\n", rank); 
 

 
    printf("I wait the thread to kill the programm %d\n", rank); 
 
    pthread_join(properlyKill, (void**)&(res)); 
 
    return 0; 
 
}

对于主我:

int main(int argc, char **argv) 
 
{ 
 
    Parser *p = new Parser("slave.xml"); 
 

 
    MPI_Init(&argc, &argv); 
 
    if(p->method == "concurrent") 
 
    { 
 
     ConcurrentManager cc(p->instance, p->solvers); 
 
     cc.run(); 
 
    } 
 
    else 
 
    { 
 
     cerr << "c The only available methods are: concurrent, eps (Embarrassingly Parallel Search) or tree" << endl; 
 
     exit(1); 
 
    } 
 

 
    delete(p); 
 
    MPI_Finalize(); 
 
    exit(0); 
 
}// main 
 

 

 
/** 
 
    Create a concurrent manager (means init the data structures to run 
 
    the solvers). 
 
    
 
    @param[in] _instance, the benchmark path 
 
    @param[in] _solvers, the set of solvers that will be ran 
 
*/ 
 
ConcurrentManager::ConcurrentManager(string _instance, vector<Solver> &_solvers) : 
 
    instance(_instance), solvers(_solvers) 
 
{ 
 
    cout << "c\nc Concurrent manager called" << endl; 
 
    
 
    nbSolvers = _solvers.size(); 
 
    np = new int[nbSolvers]; 
 
    cmds = new char*[nbSolvers]; 
 
    arrayOfArgs = new char **[nbSolvers]; 
 
    infos = new MPI_Info[nbSolvers]; 
 

 
    for(int i = 0 ; i<nbSolvers ; i++) 
 
    { 
 
     np[i] = solvers[i].npernode; 
 

 
     cmds[i] = new char[(solvers[i].executablePath).size() + 1]; 
 
     strcpy(cmds[i], (solvers[i].executablePath).c_str());  
 

 
     arrayOfArgs[i] = new char *[(solvers[i].options).size() + 1]; 
 
     for(unsigned int j = 0 ; j<(solvers[i].options).size() ; j++) 
 
     { 
 
      arrayOfArgs[i][j] = new char[(solvers[i].options[j]).size() + 1]; 
 
      strcpy(arrayOfArgs[i][j], (solvers[i].options[j]).c_str());   
 
     } 
 
     arrayOfArgs[i][(solvers[i].options).size()] = NULL; 
 

 
     MPI_Info_create(&infos[i]); 
 

 
     char hostname[solvers[i].hostname.size()]; 
 
     strcpy(hostname, solvers[i].hostname.c_str()); 
 
     MPI_Info_set(infos[i], "host", hostname); 
 
    } 
 

 
    sizeComm = 0; 
 
}// constructor 
 

 

 
/** 
 
    Wait that at least one process finish and return the code 
 
    SOLUTION_FOUND. 
 

 
    @param[in] intercomm, the communicator 
 
*/ 
 
void ConcurrentManager::waitForSolution(MPI_Comm &intercomm) 
 
{ 
 
    MPI_Status arrayStatus[sizeComm], status; 
 
    MPI_Request request[sizeComm]; 
 
    int val[sizeComm], flag; 
 

 
    for(int i = 0 ; i<sizeComm ; i++) MPI_Irecv(&val[i], 1, MPI_INT, i, TAG_MSG, intercomm, &request[i]); 
 

 
    bool solutionFound = false; 
 
    while(!solutionFound) 
 
    { 
 
     for(int i = 0 ; i<sizeComm ; i++) 
 
     { 
 
      MPI_Test(&request[i], &flag, &arrayStatus[i]); 
 
      if(flag) 
 
      { 
 
       printf("---------------------> %d reveived %d\n", i , val[i]); 
 
       if(val[i] == SOLUTION_FOUND) 
 
       { 
 
        int msg = PRINT_SOLUTION; 
 
        MPI_Send(&msg, 1, MPI_INT, i, TAG_MSG, intercomm); // ask to print the solution 
 

 
        int msgJobFinished; 
 
        MPI_Recv(&msgJobFinished, 1, MPI_INT, i, TAG_MSG, intercomm, &status); // wait the answer 
 
        assert(msgJobFinished == JOB_FINISHED); 
 

 
        cout << "I am going to kill everybody" << endl; 
 
        
 
        int msgKill[sizeComm]; 
 
        for(int j = 0 ; j<sizeComm ; j++) 
 
        { 
 
         msgKill[i] = STOP_AT_ONCE; 
 
         MPI_Send(&msgKill[i], 1, MPI_INT, j, TAG_KILL, intercomm); 
 
        } 
 

 
        solutionFound = true; 
 
        break; 
 
       } else 
 
       { 
 
        printf("restart the communication for %d\n", i); 
 
        MPI_Irecv(&val[i], 1, MPI_INT, i, TAG_MSG, intercomm, &request[i]); 
 
       } 
 
      } 
 
     }  
 
    } 
 
}// waitForSolution 
 

 

 
/** 
 
    Run the solver. 
 
*/ 
 
void ConcurrentManager::run() 
 
{ 
 
    MPI_Comm intercomm; 
 
    int errcodes[solvers.size()]; 
 

 
    MPI_Comm_spawn_multiple(nbSolvers, cmds, arrayOfArgs, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes); 
 
    
 
    MPI_Comm_remote_size(intercomm, &sizeComm); 
 
    cout << "c Solvers are now running: " << sizeComm << endl; 
 

 
    int msg = CONCU_MODE; 
 
    for(int i = 0 ; i<sizeComm ; i++) MPI_Send(&msg, 1, MPI_INT, i, TAG_MSG, intercomm); // init the working mode 
 
    
 
    waitForSolution(intercomm); 
 
}// run

我知道,我投入了大量的代码:(

但是,我不知道问题出在哪里。

请帮帮我:)

此致敬礼。

+0

'Signal:Segmentation fault(11)' - 应该创建一个核心文件。首先看看它。 – SergeyA

回答

1

The MPI documentation for how MPI interacts with threads要求主线程执行对MPI_Finalize()的调用 - 即与初始化MPI的调用相同。在你的情况下,这也是你的过程的最初线索。

为了满足MPI的要求,您可以重新组织您的应用程序,以便初始线程等待kill信号并关闭MPI。它目前所做的其他工作将需要转移到另一个线程。

相关问题