2012-01-23 111 views
4

给定一组字符串(大集合)和一个输入字符串,您需要高效地查找输入字符串的所有字母。你会使用什么数据结构。使用它,你将如何找到字形?我想到的在一组字符串中查找输入字符串..?

事情是这些:

  1. 使用地图

    一)消除了比输入更多/更少字母的所有单词。

    二)把输入的字符在地图

    C)遍历地图每个字符串,看看是否所有字母都存在与他们的计数。

  2. 使用尝试次数

    一)把具有字符正确的号码为线索的所有字符串。

    b)如果输入中包含字母,则遍历每个分支并进一步深入。

    c)若叶达到这个词是字谜

任何人都可以找到一个更好的解决方案?

在上述方法中是否有任何问题?

+0

比方说,你有一个字谜的候选人。您可以尝试对输入字符串和此字符串进行排序 - 排序后它们应该是相同的。你有没有考虑过这种方法? – user998692

+0

排序会给我额外的时间消耗。而我的上述方法是线性的,无需排序 –

+0

假设平均字长在范围[3,20]字符中......当对一个字进行排序时,您的数据比较有限。此外,一旦使用散列表预处理了整个字典,则每个后续对getAnagrams的调用都将是O(1),而在trie方法中不是这样。 –

回答

5

从每个单词构建一个频率图并比较这些图。

伪代码:

class Word 

    string word 
    map<char, int> frequency 

    Word(string w) 
    word = w 
    for char in word 
     int count = frequency.get(char) 
     if count == null 
     count = 0 
     count++ 
     frequency.put(char, count) 

    boolean is_anagram_of(that) 
    return this.frequency == that.frequency 
+0

听起来不错!但它比trie方法更快吗?此外,由于某些字母会很常见,因此字典会使用较少的内存。 –

+0

@KshitijBanerjee为了工作,你需要对这些字符进行排序并且从那些已排序的字符中构建字典句柄(你会如何确定“mary”和“army”是字谜)?对字词排序需要'O(n * log(n))',同时构建一个基于散列的映射将采用'O(n)'。 –

+0

为什么我会排序来制作特里?考虑玛丽作为输入词和包含军队的列表。我在mary上创建一个hashmap。现在在列表中的所有单词上建立一个trie。并遍历每个分支。当分支是军队时,我会顺利地到达叶子,并且我有一场比赛。不需要排序..对吗? –

4

你可以建立其中关键的排序的的HashMap(字),并将该值是,排序的所有单词的列表,给予相应的键:

private Map<String, List<String>> anagrams = new HashMap<String, List<String>>(); 

void buildIndex(){ 
    for(String word : words){ 
     String sortedWord = sortWord(word); 
     if(!anagrams.containsKey(sortedWord)){ 
      anagrams.put(sortedWord, new ArrayList<String>()); 
     } 
     anagrams.get(sortedWord).add(word); 
    } 
} 

然后,您只需在刚刚构建的散列映射中查找已排序的单词,即可得到所有字母表的列表。

0
import java.util.HashMap; 
import java.util.HashSet; 
import java.util.Map; 
import java.util.Set; 
/* 
*Program for Find Anagrams from Given A string of Arrays. 
* 
*Program's Maximum Time Complexity is O(n) + O(klogk), here k is the length of word. 
* 
* By removal of Sorting, Program's Complexity is O(n) 
* **/ 
public class FindAnagramsOptimized { 
    public static void main(String[] args) { 
     String[] words = { "gOd", "doG", "doll", "llod", "lold", "life", 
"sandesh", "101", "011", "110" }; 
     System.out.println(getAnaGram(words)); 
    } 
    // Space Complexity O(n) 
    // Time Complexity O(nLogn) 
    static Set<String> getAnaGram(String[] allWords) { 
     // Internal Data Structure for Keeping the Values 
     class OriginalOccurence { 
      int occurence; 
      int index; 
     } 
     Map<String, OriginalOccurence> mapOfOccurence = new HashMap<>(); 
     int count = 0; 
     // Loop Time Complexity is O(n) 
    // Space Complexity O(K+2K), here K is unique words after sorting on a 

    for (String word : allWords) { 
     String key = sortedWord(word); 

     if (key == null) { 
      continue; 
     } 
     if (!mapOfOccurence.containsKey(key)) { 
      OriginalOccurence original = new OriginalOccurence(); 
      original.index = count; 
      original.occurence = 1; 
      mapOfOccurence.put(key, original); 
     } else { 
      OriginalOccurence tempVar = mapOfOccurence.get(key); 
      tempVar.occurence += 1; 
      mapOfOccurence.put(key, tempVar); 
     } 
     count++; 
    } 

    Set<String> finalAnagrams = new HashSet<>(); 

    // Loop works in O(K), here K is unique words after sorting on 
    // characters 
    for (Map.Entry<String, OriginalOccurence> anaGramedWordList : mapOfOccurence.entrySet()) { 
     if (anaGramedWordList.getValue().occurence > 1) { 
      finalAnagrams.add(allWords[anaGramedWordList.getValue().index]); 
     } 
    } 

    return finalAnagrams; 
} 

// Array Sort works in O(nLogn) 
// Customized Sorting for only chracter's works in O(n) time. 
private static String sortedWord(String word) { 

    // int[] asciiArray = new int[word.length()]; 
    int[] asciiArrayOf26 = new int[26]; 
    // char[] lowerCaseCharacterArray = new char[word.length()]; 
    // int characterSequence = 0; 
    // Ignore Case Logic written in lower level 
    for (char character : word.toCharArray()) { 
     if (character >= 97 && character <= 122) { 
      // asciiArray[characterSequence] = character; 
      if (asciiArrayOf26[character - 97] != 0) { 
       asciiArrayOf26[character - 97] += 1; 
      } else { 
       asciiArrayOf26[character - 97] = 1; 
      } 
     } else if (character >= 65 && character <= 90) { 
      // asciiArray[characterSequence] = character + 32; 
      if (asciiArrayOf26[character + 32 - 97] != 0) { 
       asciiArrayOf26[character + 32 - 97] += 1; 
      } else { 
       asciiArrayOf26[character + 32 - 97] = 1; 
      } 
     } else { 
      return null; 
     } 

     // lowerCaseCharacterArray[characterSequence] = (char) 
     // asciiArray[characterSequence]; 
     // characterSequence++; 
    } 
    // Arrays.sort(lowerCaseCharacterArray); 

    StringBuilder sortedWord = new StringBuilder(); 
    int asciiToIndex = 0; 
    // This Logic uses for reading the occurrences from array and copying 
    // back into the character array 
    for (int asciiValueOfCharacter : asciiArrayOf26) { 
     if (asciiValueOfCharacter != 0) { 
      if (asciiValueOfCharacter == 1) { 
       sortedWord.append((char) (asciiToIndex + 97)); 
      } else { 
       for (int i = 0; i < asciiValueOfCharacter; i++) { 
        sortedWord.append((char) (asciiToIndex + 97)); 
       } 
      } 
     } 
     asciiToIndex++; 
    } 
    // return new String(lowerCaseCharacterArray); 
    return sortedWord.toString(); 
} 
}