2011-01-22 58 views
1

问候!将递归差分算法转换为迭代算法的建议?

我写了一个从头开始递归差异算法。它找到两个字符串之间的“最佳匹配”,以便差异最小化,并用CAPS中表示的任何差异打印出两个字符串。它“按原样”正常工作,除非它效率很低。我一直在盯着它一天半,试图找到让它迭代的方法,或者至少减少它达到的堆栈深度,但我在智慧的结尾,并希望在这里敏锐的头脑会比我更清楚地看到解决方案。

下面是代码的肉。被引用的MergePoint类只是一个简单的“链接列表”节点,它包含“原始索引”整数,“已更改索引”和“下一个”MergePoint。 MergePoint列表表示每个数组中已经“合并”的一系列索引。当链完成时,链中未表示的任何索引都是插入/删除。 NullObject对象是MergePoint的扩展,回想起来,它不是严格需要创建的,基本上可以认为是一个普通的'null'。

任何意见/建议将非常感激。

public class StringCompare 
{ 
    public static int[][]  mergeList  = new int[0][0]; 
    public static MergePoint NULL   = NullObject.getNull(); 
    public static int   maxMerged  = 0; 
    public static int   minClusterSize = -1; 

    public static void diff(String orig, String alt) 
    { 
     String[] original = orig.toUpperCase().split(" "); 
     String[] altered = alt.toUpperCase().split(" "); 

     for(int i = 0; i < altered.length; i++) 
     { 
      merge(original, altered, 0, i, NULL, NULL, 0, 0); 
     } 


     for(int i = 0; i < mergeList.length; i++) 
     { 
      or[mergeList[i][0]] = or[mergeList[i][0]].toLowerCase(); 
      al[mergeList[i][1]] = al[mergeList[i][1]].toLowerCase(); 
     } 

     printStringArray(or); 
     printStringArray(al); 
    } 

    private void printStringArray(String[] arr) 
    { 
     for(String word : arr) 
     { 
      System.out.print(word.trim() + " "); 
     } 

     System.out.println(); 
    } 

    private static void merge(String[] original, String[] altered, int indexInOriginal, int indexInAltered, MergePoint head, MergePoint tail, int listSize, int clusters) 
    { 
     if (indexInOriginal >= original.length) 
     {   
      if (listSize > 0) 
      { 
       if (((listSize == maxMerged) && (clusters < minClusterSize)) || 
        (listSize > maxMerged)) 
       { 
        storeMergePoints(head, listSize, clusters); 
       } 
      } 
     } 
     else if (indexInAltered >= altered.length) 
     { 
      if (tail != NULL) 
      { 
       merge(original, altered, (indexInOriginal + 1), (tail.indexInNew() + 1), head, tail, listSize, clusters); 
      } 
      else 
      { 
       merge(original, altered, (indexInOriginal + 1), 0, head, tail, listSize, 0); 
      } 
     } 
     else 
     { 
      if(original[indexInOriginal].equals(altered[indexInAltered])) 
      { 
       MergePoint mergePoint = new MergePoint(indexInOriginal, indexInAltered); 
       MergePoint bookMark = NULL;    
       int   newClusters = clusters; 

       if (indexInOriginal != (tail.indexInOriginal() + 1)) 
       { 
        newClusters++; 
       } 

       if (indexInAltered != (tail.indexInNew() + 1)) 
       { 
        newClusters++; 
       } 

       if (head == NULL) 
       { 
        head = mergePoint; 
        tail = head; 
       } 
       else 
       { 
        tail.setNext(mergePoint); 

        bookMark = tail;     
        tail  = tail.next(); 
       } 

       merge(original, altered, (indexInOriginal + 1), (indexInAltered + 1), head, tail, (listSize + 1), newClusters); 

       if (bookMark == NULL) 
       { 
        merge(original, altered, indexInOriginal, (indexInAltered + 1), NULL, NULL, 0, 0); 
       } 
       else 
       { 
        bookMark.setNext(NULL); 

        merge(original, altered, indexInOriginal, (indexInAltered + 1), head, bookMark, listSize, newClusters); 
       } 
      } 
      else 
      { 
       merge(original, altered, indexInOriginal, (indexInAltered + 1), head, tail, listSize, clusters); 
      } 
     } 
    } 

    public static void storeMergePoints(MergePoint current, int size, int clusters) 
    {  
     mergeList  = new int[size][2]; 
     maxMerged  = size; 
     minClusterSize = clusters; 

     for(int i = 0; i < size; i++) 
     { 
      mergeList[i][0] = current.indexInOriginal(); 
      mergeList[i][1] = current.indexInNew(); 
      current   = current.next(); 
     }  
    } 
} 
+1

我不相信只要把它变成一个迭代解决方案就能解决你的性能问题。可能值得看看这种现有算法的性能表现:http://en.wikipedia.org/wiki/Longest_common_subsequence_problem – 2011-01-22 08:23:51

回答

4

对于迭代你可以考虑用自己控制的堆栈对象替换JVM堆更换递归:java.util.Stack中的将是非常适合的。只需在每次迭代中将()和pop()数据推送到该堆栈上,而不是让方法调用它自己。