2014-01-27 35 views
-1

我有一个75200字的列表。我需要为每个单词提供一个“唯一”ID,并且每个ID的长度可以是3个字母或更少。我可以使用数字,字母或甚至符号,但最大长度为3.生成最大长度为3位/字母/符号的唯一ID

以下是我的代码。

import java.io.*; 
import java.util.*; 


public class HashCreator { 

    private Map completedWordMap; 
    private String [] simpleLetters = {"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"}; 
    private String[] symbols = {"!","@","#","$","%","^","&","*","~","?"}; 
    private String indexNumber; 
    String currentlyUsingLetter, currentlyUsingSymbol; 
    private int currentlyActiveSimpleLetter = 0, currentlyActiveSymbol = 0, currentlyActiveSimpleLetter2 = 0, currentlyActiveSymbol2 = 0; 
    private boolean secondaryNumberIsHundred = false; 

    public HashCreator() 
    { 
     completedWordMap = createWordNumberingMap(); 
    } 


    private Map createWordNumberingMap() 
    { 
     int number = 0; 
     int secondaryNumber = 0; 
     int thirdinoryNumber = 0; 
     Map wordMap = new HashMap(); 
     BufferedReader br = null; 
     String str = ""; 
     boolean reset = false; 


     //First Read The File 
     File readingFile = new File("WordList/NewWordsList.txt"); 

     try 
     { 
      br = new BufferedReader(new FileReader(readingFile)); 

      while((str=br.readLine())!=null) 
      { 
       if(number<1000) //Asign numbers from 0 t0 999 
       { 
        indexNumber = String.valueOf(number); 
        wordMap.put(indexNumber, str); 
        number++; 
        System.out.println(indexNumber); 
       } 
       else // It is 1000 now. Length exceeds so find another way. 
       { 
        if(indexNumber.length()<4) 
        { 
         if(currentlyActiveSimpleLetter<simpleLetters.length) //Start using simple letter array 
         { 
          if(secondaryNumber<100) //Start combining numbers with letters. Results will look like 'a0', a1', 'a2'......'x98',x99' 
          { 
           indexNumber = simpleLetters[currentlyActiveSimpleLetter]+secondaryNumber; 
           wordMap.put(indexNumber, str); 
           secondaryNumber++; 
           System.out.println(indexNumber); 

          } 
          else 
          { 
           //If the number is 100, that means the last result is something like 'a99','b99'...'x99' 
           //Time to use a new letter and set the counter back to 0 and select the next letter 

           secondaryNumber = 0; 
           currentlyActiveSimpleLetter++; 

          } 
         } 
         else 
         { 

          if(currentlyActiveSymbol<symbols.length) //We have used the entire alphabet. Start using sybmols now. 
          { 
           if(currentlyActiveSymbol==0) //If this is the first time we are reaching this step, reset the counter to 0 
           { 
            secondaryNumber = 0; 
           } 

           if(secondaryNumber<100) 
           { 
            indexNumber = symbols[currentlyActiveSymbol]+secondaryNumber; 
            wordMap.put(indexNumber, str); 
            secondaryNumber++; 
            System.out.println(indexNumber); 

           } 
           else 
           { 
            //If the number is 100, that means the last result is something like '!99','@99'...'*99' 
            //Time to use a new letter and set the counter back to 0 and select the next symbol 
            secondaryNumber = 0; 
            currentlyActiveSymbol++; 

           } 
          } 
          else 
          { 
           //We have used entire list of numbers (0-999), entire list of letters (a0-z99) and entire set of symbols (!0 - ?99) 
           //Now we need to combine all 3 together. 


           if(thirdinoryNumber<10)//We are starting with a new 'Number' counter 
           { 
            //We again start with replacing numbers. Here the first few and last few results will look like a!0'.....'a!9' 

             indexNumber = simpleLetters[currentlyActiveSimpleLetter2]+symbols[currentlyActiveSymbol]+thirdinoryNumber; 
             wordMap.put(indexNumber, str); 
             thirdinoryNumber++; 
             System.out.println(indexNumber); 

             thirdinoryNumber++; 
           } 
           else 
           { 
            //We have used number from 0-9. Time to start replacing letters 

            if(currentlyActiveSimpleLetter2<simpleLetters.length) 
            { 
             if(currentlyActiveSimpleLetter2==0) //If this is the 'first' time we reach this point, reset the number counter. 
             { 
              thirdinoryNumber = 0; 
             } 

             if(thirdinoryNumber<10) 
             { 
              indexNumber = simpleLetters[currentlyActiveSimpleLetter2]+symbols[currentlyActiveSymbol]+thirdinoryNumber; 
              wordMap.put(indexNumber, str); 
              thirdinoryNumber++; 
              System.out.println(indexNumber); 

             } 
             else 
             { 
              thirdinoryNumber = 0; 
              currentlyActiveSimpleLetter2++; //If we are at the peek of usable numbers (0-9) reset simpleletter array position to 
                      // 0 and numbercounter to 0 

             } 
            } 
            else 
            { 
             //We have used number from 0-9. Time to start replacing symbols 

             if(currentlyActiveSymbol2<symbols.length) 
             { 
              if(currentlyActiveSymbol2==0) //If this is the 'first' time we reach this point, reset the number counter. 
              { 
               thirdinoryNumber = 0; 
              } 

              if(thirdinoryNumber<10) 
              { 
               indexNumber = simpleLetters[currentlyActiveSimpleLetter2]+symbols[currentlyActiveSymbol]+thirdinoryNumber; 
               wordMap.put(indexNumber, str); 
               thirdinoryNumber++; 
               System.out.println(indexNumber); 

              } 
              else 
              { 
               thirdinoryNumber = 0; 
               currentlyActiveSymbol2++; //If we are at the peek of usable numbers (0-9) reset symbol array position to 
                       // 0 and numbercounter to 0 

              } 
             } 
            } 
           } 

          } 

         } 
        } 
        else 
        { 
         System.out.println("Error in Somewhere. Length Exceeded"); 
        } 
       } 

      } 

      br.close(); 
      System.out.println("Completed"); 
      System.out.println(wordMap.get(0)); 

     } 
     catch(Exception e) 
     { 
      e.printStackTrace(); 
     } 
     finally 
     { 
      try 
      { 
       br.close(); 
      } 
      catch(Exception e) 
      { 
       e.printStackTrace(); 
      } 
     } 


     return wordMap; 

    } 


} 

不幸的是,这是行不通的。它打印结果,并且在结果'z99'之后它是一堆!0。下面是一小片的那样:

!0 
!0 
!0 
!0 
... 
Completed 
null 

除此之外,k99后,它已经生成的ID从10-199然后用m0开始回到正常。您可以从here找到结果文件。

如您所见,wordMap.get(0)也生成null。这里有什么问题?如果还有其他简单的方法可以生成75000个最大3位/字母/符号长度的唯一ID,我非常乐意随它一起移动。

+0

嗨Peakgen。我们鼓励将答案作为自我回答而不是作为问题编辑。你愿意编辑你的文章,将其转移到自我回答?这会为我节省一些工作。谢谢! – halfer

+0

(我现在已经自己提出了这个答案)。 – halfer

回答

1

您可以创建一个基本上将十进制数转换为您选择的基数的方法。在这里,我有46个符号,例如97336唯一序列:

private static final String[] symbols = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", 
     "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "!", "@", "#", "$", "%", "^", "&", 
     "*", "~", "?" }; 
public static String getSequence(final int i) { 
    return symbols[i/(symbols.length * symbols.length)] + symbols[(i/symbols.length) % symbols.length] 
      + symbols[i % symbols.length]; 
} 
+0

另一个问题是他为什么要这样做。 – Teepeemm

+0

听起来像某人可能正在制作彩虹表... – BitNinja

+0

谢谢你的回复。但是这个'int i'参数是什么?我应该通过什么号码?也许数字从0到75000? –

2

这里是发电机有足够的ID。

public class Main { 
    private char[] A; 
    void init() 
    { 
     A = new char[] { 
      '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
      'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 
      'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
      'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
      'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' 
     }; 
     System.out.println("digits = " + A.length); 

     //for (int i = 0; i < A.length; i++) 
     // System.out.print(A[i] + " "); 
     //System.out.println(); 
    } 

    public void generate(int length, String id) 
    { 
     if (length == 3) { 
      System.out.println(id); 
     } else { 
      for (int i = 0; i < A.length; i++) 
       generate(length + 1, id + A[i]); 
     } 
    } 

    public static void main(String[] args) { 
     Main test = new Main(); 
     test.init(); 
     test.generate(0, ""); 
    } 
} 

唯一ID的数量为(26 + 26 + 10)^ 3 = 62^3 = 238328.

显然,你需要适应它以满足您的特定问题。

实际上只需要43个字符,因为43^3 = 79507> 75200.

编辑:generate()方法的说明。

该方法实现了一个recursive算法来生成字符组合(键)。参数的含义如下:

  • 长度密钥的长度。
  • id存储字符的组合。

下面的图片可以帮助理解算法。

enter image description here

这类似于小数(或任何其他碱)如何形成数字。

我没有注意到的一点是,您正试图先创建长度为1的所有可能的密钥,然后是长度为2的所有可能的密钥,依此类推。我的生成器只能创建3个字符的键。可以实现修改generate()方法进行如下的行为:

public void generate(int count, String id) 
{ 
    if (count == 0) { 
     System.out.println(id); 
    } else { 
     for (int i = 0; i < A.length; i++) 
      generate(count - 1, id + A[i]); 
    } 
} 

,然后调用方法树时间:

test.generate(1, ""); 
test.generate(2, ""); 
test.generate(3, ""); 

某些键包含前导零,但不应该是因为这关键的一个问题是标识符,而不是数字。可能的密钥数量增加了长度(字母) + 长度(字母)^ 2,即我们有62 + 62^2个额外的密钥。

由于密钥的长度最多为3迭代版本可以使用for循环可以轻松实现:

public void iterative_generator() 
{ 
    for (int i = 0; i < A.length; i++) { 
     for (int j = 0; j < A.length; j++) { 
      for (int k = 0; k < A.length; k++) { 
       System.out.println("" + A[i] + A[j] + A[k]); 
      } 
     } 
    } 
} 

我觉得你的想法。

+0

谢谢你的回复。但是这是什么'(int length,String id)'参数?我应该通过那里? –

0

(发布代表问题作者)

这就是我如何根据Stack Overflow用户“Keppil”的答案编写我的代码。

import java.io.*; 
import java.util.*; 

public class HashCreator 
{ 
    private Map completedWordMap; 
    private String[]simpleLetters = {"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"}; 
    private char[] A; 

     private static final String[] symbols = { "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", 
     "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "!", "@", "#", "$", "%", "^", "&", 
     "*", "~", "?" }; 

    public HashCreator() 
    { 

     for(int i=0;i<75001;i++) 
     { 
      System.out.println(getSequence(i)); 
     } 
    } 




    public static String getSequence(final int i) { 
    return symbols[i/(symbols.length * symbols.length)] + symbols[(i/symbols.length) % symbols.length] 
      + symbols[i % symbols.length]; 
} 
}