2012-04-28 92 views
1

我对使用一些外部的C库C++程序的工作。据我所知,这不是问题的原因,问题出在我的C++代码上。该程序运行良好,没有任何错误或测试数据集上的任何错误,但经过几乎整个完整的数据集后,我得到了段错误。运行GDB给了我这个段错误:C++赛格故障问题

 
(gdb) run -speciesMain=allMis1 -speciesOther=anoCar2 -speciesMain=allMis1 -speciesOther=anoCar2 /hive/data/genomes/allMis1/bed/lastz.anoCar2/mafRBestNet/*.maf.gz 
Starting program: /cluster/home/jstjohn/bin/mafPairwiseSyntenyDecay -speciesMain=allMis1 -speciesOther=anoCar2 -speciesMain=allMis1 -speciesOther=anoCar2 /hive/data/genome 
s/allMis1/bed/lastz.anoCar2/mafRBestNet/*.maf.gz 
Detaching after fork from child process 3718. 

Program received signal SIGSEGV, Segmentation fault. 
0x0000003009cb7672 in __gnu_cxx::__exchange_and_add(int volatile*, int)() from /usr/lib64/libstdc++.so.6 
(gdb) up 
#1 0x0000003009c9db59 in std::basic_string, std::allocator >::~basic_string()() from /usr/lib64/libstdc++.so.6 
(gdb) up 
#2 0x00000000004051e7 in PairAlnInfo::~PairAlnInfo (this=0x7fffffffcd70, __in_chrg=) at mafPairwiseSyntenyDecay.cpp:37 
(gdb) up 
#3 0x0000000000404eb0 in main (argc=2, argv=0x7fffffffcf78) at mafPairwiseSyntenyDecay.cpp:260 

它看上去就像是怎么回事了双自由我PairAlnInfo类的。奇怪的是我没有定义析构函数,我也没有用new来分配任何东西。我已经在linux机器上用g ++ 44和g ++ 4.1.2尝试了这一点,并得到了相同的结果。为了使事情变得更加奇特,在我的Linux机器上(有更多可用的RAM和一切,不是RAM是这个程序的问题,但它是一个强大的系统),在程序到达之前,seg故障发生如上所述循环打印输出。在使用或者G ++或铛++我小很多的MacBook Air,该程序仍出现segfaults,但它没有做到这一点,直到结果打印,最后return(0)出来的主要功能之前。下面是GDB跟踪的样子与Mac的默认G ++ 4.2编译后在同一个文件上运行我的Mac:

 
(more results)... 
98000 27527 162181 0.83027 
99000 27457 161467 0.829953 
100000 27411 160794 0.829527 

Program received signal EXC_BAD_ACCESS, Could not access memory. 
Reason: KERN_INVALID_ADDRESS at address: 0x00004a2c00106077 
0x00007fff9365a6e5 in std::string::_Rep::_M_dispose() 
(gdb) up 
#1 0x00007fff9365a740 in std::basic_string, std::allocator >::~basic_string() 
(gdb) up 
#2 0x0000000100003938 in main (argc=1261, argv=0x851d5fbff533) at mafPairwiseSyntenyDecay.cpp:301 
(gdb) 

万一你没有注意到我的帖子的时候,它是关于2 :现在30AM ...我现在已经在这个问题上砍了10个小时了。非常感谢您抽出宝贵时间看看并帮助我!下面是代码和一些复制我的情况的说明。

如果你有兴趣下载与安装依赖整个事情然后下载我KentLib库,make在基本目录,然后去examples/mafPairwiseSyntenyDecay和运行make那里。导致我讨论这个bug的一个例子(相当大)是这里提供的gziped文件:100Mb file that the program crashes on。然后用这些参数执行程序-speciesMain=allMis1 -speciesOther=anoCar2 anoCar2.allMis1.rbest.maf.gz

/** 
* mafPairwiseSyntenyDecay 
* Author: John St. John 
* Date: 4/26/2012 
* 
* calculates the mean synteny decay in different range bins 
* 
* 
*/ 

//Kent source C imports 
extern "C" { 

#include "common.h" 
#include "options.h" 
#include "maf.h" 

} 

#include <map> 
#include <string> 
#include <set> 
#include <vector> 
#include <sstream> 
#include <iostream> 

//#define NDEBUG 
#include <assert.h> 

using namespace std; 


/* 
Global variables 
*/ 

class PairAlnInfo { 
public: 
    string oname; 
    int sstart; 
    int send; 
    int ostart; 
    int oend; 
    char strand; 
    PairAlnInfo(string _oname, 
     int _sstart, int _send, 
     int _ostart, int _oend, 
     char _strand): 
     oname(_oname), 
     sstart(_sstart), 
     send(_send), 
     ostart(_ostart), 
     oend(_oend), 
     strand(_strand){} 
    PairAlnInfo(): 
    oname("DUMMY"), 
    sstart(-1), 
    send(-1), 
    ostart(-1), 
    oend(-1), 
    strand(-1){} 

}; 

vector<string> &split(const string &s, char delim, vector<string> &elems) { 
    stringstream ss(s); 
    string item; 
    while(getline(ss, item, delim)) { 
    elems.push_back(item); 
    } 
    return(elems); 
} 


vector<string> split(const string &s, char delim) { 
    vector<string> elems; 
    return(split(s, delim, elems)); 
} 

#define DEF_MIN_LEN (200) 
#define DEF_MIN_SCORE (200) 

typedef map<int,PairAlnInfo> PairAlnInfoByPos; 
typedef map<string, PairAlnInfoByPos > ChromToPairAlnInfoByPos; 
ChromToPairAlnInfoByPos pairAlnInfoByPosByChrom; 


void usage() 
/* Explain usage and exit. */ 
{ 
    errAbort(
     (char*)"mafPairwiseSyntenyDecay -- Calculates pairwise syntenic decay from maf alignment containing at least the two specified species.\n" 
     "usage:\n" 
     "\tmafPairwiseSyntenyDecay [options] [*required options] file1.maf[.gz] ... \n" 
     "Options:\n" 
     "\t-help\tPrints this message.\n" 
     "\t-minScore=NUM\tMinimum MAF alignment score to consider (default 200)\n" 
     "\t-minAlnLen=NUM\tMinimum MAF alignment block length to consider (default 200)\n" 
     "\t-speciesMain=NAME\t*Name of the main species (exactly as it appears before the '.') in the maf file (REQUIRED)\n" 
     "\t-speciesOther=NAME\t*Name of the other species (exactly as it appears before the '.') in the maf file (REQUIRED)\n" 
); 
}//end usage() 


static struct optionSpec options[] = { 
    /* Structure holding command line options */ 
    {(char*)"help",OPTION_STRING}, 
    {(char*)"minScore",OPTION_INT}, 
    {(char*)"minAlnLen",OPTION_INT}, 
    {(char*)"speciesMain",OPTION_STRING}, 
    {(char*)"speciesOther",OPTION_STRING}, 
    {NULL, 0} 
}; //end options() 

/** 
* Main function, takes filenames for paired qseq reads 
* and outputs three files. 
*/ 
int iterateOverAlignmentBlocksAndStorePairInfo(char *fileName, const int minScore, const int minAlnLen, const string speciesMain, const string speciesOther){ 
    struct mafFile * mFile = mafOpen(fileName); 
    struct mafAli * mAli; 

    //loop over alignment blocks 
    while((mAli = mafNext(mFile)) != NULL){ 
    struct mafComp *first = mAli->components; 
    int seqlen = mAli->textSize; 
    //First find and store set of duplicates in this block 
    set<string> seen; 
    set<string> dups; 
    if(mAli->score < minScore || seqlen < minAlnLen){ 
     //free here and pre-maturely end 
     mafAliFree(&mAli); 
     continue; 
    } 

    for(struct mafComp *item = first; item != NULL; item = item->next){ 
     string tmp(item->src); 
     string tname = split(tmp,'.')[0]; 
     if(seen.count(tname)){ 
     //seen this item 
     dups.insert(tname); 
     }else{ 
     seen.insert(tname); 
     } 
    } 
    for(struct mafComp *item1 = first; item1->next != NULL; item1 = item1->next){ 
     //stop one before the end 
     string tmp1(item1->src); 
     vector<string> nameSplit1(split(tmp1,'.')); 
     string name1(nameSplit1[0]); 
     if(dups.count(name1) || (name1 != speciesMain && name1 != speciesOther)){ 
     continue; 
     } 

     for(struct mafComp *item2 = item1->next; item2 != NULL; item2 = item2->next){ 
     string tmp2(item2->src); 
     vector<string> nameSplit2(split(tmp2,'.')); 
     string name2 = nameSplit2[0]; 
     if(dups.count(name2) || (name2 != speciesMain && name2 != speciesOther)){ 
      continue; 
     } 

     string chr1(nameSplit1[1]); 
     string chr2(nameSplit2[1]); 
     char strand; 
     if(item1->strand == item2->strand) 
      strand = '+'; 
     else 
      strand = '-'; 

     int start1,end1,start2,end2; 

     if(item1->strand == '+'){ 
      start1 = item1->start; 
      end1 = start1 + item1->size; 
     }else{ 
      end1 = item1->start; 
      start1 = end1 - item1->size; 
     } 

     if(item2->strand == '+'){ 
      start2 = item2->start; 
      end2 = start2+ item2->size; 
     }else{ 
      end2 = item2->start; 
      start2 = end2 - item2->size; 
     } 

     if(name1 == speciesMain){ 
      PairAlnInfo aln(chr2,start1,end1,start2,end2,strand); 
      pairAlnInfoByPosByChrom[chr1][start1] = aln; 
     }else{ 
      PairAlnInfo aln(chr1,start2,end2,start1,end1,strand); 
      pairAlnInfoByPosByChrom[chr2][start2] = aln; 
     } 

     } //end loop over item2 
    } //end loop over item1 
    mafAliFree(&mAli); 
    }//end loop over alignment blocks 

    mafFileFree(&mFile); 
    return(0); 
} 


int main(int argc, char *argv[]) 
/* Process command line. */ 
{ 
    optionInit(&argc, argv, options); 
    if(optionExists((char*)"help") || argc <= 1){ 
    usage(); 
    } 
    int minAlnScore = optionInt((char*)"minScore",DEF_MIN_SCORE); 
    int minAlnLen = optionInt((char*)"minAlnLen",DEF_MIN_LEN); 

    string speciesMain(optionVal((char*)"speciesMain",NULL)); 
    string speciesOther(optionVal((char*)"speciesOther",NULL)); 

    if(speciesMain.empty() || speciesOther.empty()) 
    usage(); 

    //load the relevant alignment info from the maf(s) 
    for(int i = 1; i<argc; i++){ 
    iterateOverAlignmentBlocksAndStorePairInfo(argv[i], minAlnScore, minAlnLen, speciesMain, speciesOther); 
    } 

    const int blockSize = 1000; 
    const int blockCount = 100; 

    int totalWindows[blockCount] = {0}; 
    int containBreak[blockCount] = {0}; 

    //we want the fraction of windows of each size that contain a break 
    // 


    for(ChromToPairAlnInfoByPos::iterator mainChromItter = pairAlnInfoByPosByChrom.begin(); 
     mainChromItter != pairAlnInfoByPosByChrom.end(); 
     mainChromItter++){ 
    //process the alignments shared by this chromosome 
    //note that map stores them sorted by begin position 
    vector<int> keys; 
    for(PairAlnInfoByPos::iterator posIter = mainChromItter->second.begin(); 
     posIter != mainChromItter->second.end(); 
     posIter++){ 
     keys.push_back(posIter->first); 
    } 

    for(int i = 0; i < keys.size(); i++){ 
     //first check for trivial window (ie our block) 
     PairAlnInfo pi1 = mainChromItter->second[keys[i]]; 
     assert(pi1.send > pi1.sstart); 
     assert(pi1.sstart == keys[i]); 
     int numBucketsThisWindow = (pi1.send - pi1.sstart)/blockSize; 
     for(int k = 0; k < numBucketsThisWindow && k < blockCount; k++) 
     totalWindows[k]++; 


     for(int j = i+1; j < keys.size(); j++){ 

     PairAlnInfo pi2 = mainChromItter->second[keys[j]]; 

     assert(pi2.sstart == keys[j]); 
     assert(pi2.send > pi2.sstart); 
     assert(pi2.sstart > pi1.sstart); 

     if(pi2.oname == pi1.oname){ 
      int moreToInc = (pi2.send - pi1.sstart)/blockSize; 
      for(int k = numBucketsThisWindow; k < moreToInc && k < blockCount; k++) 
      totalWindows[k]++; 
      numBucketsThisWindow = moreToInc; //so we don't double count 
     }else{ 

      int numDiscontigBuckets = (pi2.send - pi1.sstart)/blockSize; 
      for(int k = numBucketsThisWindow; k < numDiscontigBuckets && k < blockSize; k++){ 
      containBreak[k]++; 
      totalWindows[k]++; 
      } 
      numBucketsThisWindow = numDiscontigBuckets; 
     } 
     if((keys[j] - keys[i]) >= (blockSize * blockCount)){ 
      //i = j; 
      break; 
     } 
     } 
    } 
    } 



    cout << "#WindowSize\tNumContainBreak\tNumTotal\t1-(NumContainBreak/NumTotal)" << endl; 
    for(int i = 0; i < blockCount; i++){ 
    cout << (i+1)*blockSize << '\t'; 
    cout << containBreak[i] << '\t'; 
    cout << totalWindows[i] << '\t'; 
    cout << (totalWindows[i] > 0? 1.0 - (double(containBreak[i])/double(totalWindows[i])): 0) << endl; 
    } 


    return(0); 
} //end main() 
+0

肯定看起来像堆在程序的某个地方覆盖。可能来自超出范围的访问,通过查看源代码很难找到。 – 2012-04-28 09:53:03

回答

2

你的问题可能是由于在程序的实际错误有时之前,你已经看到了一些问题,由于发生内存错误。在您发布的代码

一个潜在的问题是循环:

for(int k = numBucketsThisWindow; k<numDiscontigBuckets && k < blockSize; k++){ 

它使用blockSize不是正确blockCount这导致totalWindows[]containBreak[]阵列两者的可能溢出。这将覆盖speciesMainspeciesOther串,alonth与堆栈,这很可能会导致您看到的错误上其他任何东西。

+0

谢谢!那样做了。 – 2012-04-28 16:43:55

4

尝试在valgrind下运行您的程序。这会给你可能或实际失去的记忆,未初始化的,报告等

+0

感谢valgrind @ Julian的建议。我必须选择其他人作为回答的人,因为他实际上发现了我的错误!也就是说,我尝试在我的Mac上使用valgrind,并且如果我正在阅读文档(检查堆栈数组访问超出限制问题),Darwin不支持诊断功能('--tool = exp- sgcheck')。我确实在我的Linux机器上运行了这个工具,以查看它是否会在未来有所帮助,并确定了代码中的错误。我一定会在将来使用valgrind,谢谢! – 2012-04-28 16:56:28

+0

valgrind岩石不是吗?从你的代码中发现错误是一件非常困难的任务,值得信任。 – Julian 2012-04-28 18:04:06