2015-04-30 71 views
2

我正在索引文本文件,我想按字母顺序打印出文本文件中的每个单词和页码。进出口运行与字母排序,虽然一个问题...这里是我目前有...Java按字母顺序排序

public void addWord(String word, int num) { 
    boolean match = false; 
    for (IndexEntry x : this) { 
     String i = x.getWord(); 
     if (i.toUpperCase().equals(word.toUpperCase())) { 
      x.add(num); 
      match = true; 
     } 
    } 
    if (match == false) { 
     IndexEntry entry = new IndexEntry(word); 
     int add = 0; 
     int count = 0; 
     boolean spot = false; 
     while (count < this.size() && !spot) { 
      String str = this.get(count).getWord(); 
      if (str.compareTo(word) > 0) { 
       add = count; 
       spot = true; 
      } 
      count++; 
     } 
     this.add(add, entry); 
     this.get(indexOf(entry)).add(num); 
    } 
} 

和这是输出....

BLUE[5, 8] 
BLACK[7] 
NEW[11] 
OLD[10] 
RED[4] 
TWO[2] 
FISH[1, 2, 4, 5, 7, 8, 10, 11] 
ONE[1] 
Done. 

这显然不是按字母顺序...任何帮助,将不胜感激。谢谢。

这里是indexEntry

import java.util.List; 
import java.util.ArrayList; 

public class IndexEntry implements Comparable<IndexEntry> 
{ 
    private String word; 
    private List<Integer> numsList; // contains Integer objects 

    /** 
    * Constructs an IndexEntry for a given word 
    * (converted to upper case); stores the word and 
    * creates an empty ArrayList<Integer> for numsList 
    * @param aWord the word for this entry 
    */ 
    public IndexEntry(String aWord) 
    { 
    word = aWord.toUpperCase(); 
    numsList = new ArrayList<Integer>(); 
    } 

    /** 
    * Returns word of this IndexEntry object 
    * @return this entry's word 
    */ 
    public String getWord() 
    { 
    return word; 
    } 

    /** 
    * Adds num at the end of this IndexEntry's numsList if 
    * num is not already in the list; otherwise makes no changes. 
    */ 
    public void add(int num) 
    { 
    if(numsList.contains(num) == false) 
     numsList.add(num); 
    } 

    /** 
    * Compares this entry for equality to another IndexEntry; 
    * the entries are considered equal if their words are 
    * the same 
    * @param obj the other IndexEntry to be compared 
    * @return true if the words match, otherwise false 
    */ 
    public boolean equals(IndexEntry obj) 
    { 
    if(word.equals(obj.getWord())) 
     return true; 
    return false; 
    } 

    /** 
    * Compares this entry to another IndexEntry 
    * by comparing their words 
    * @param obj the other IndexEntry to be compared 
    * @return negative if 'this' entry smaller, 0 if equal, positive is 'this' larger 
    */ 
    public int compareTo(IndexEntry obj) 
    { 
    return obj.getWord().compareTo(word); 
    } 

    /** 
    * Converts this IndexEntry into a string 
    * @return the String representation of this entry: word and line numbers 
    */ 
    public String toString() 
    { 
    return word + numsList; 
    } 
} 

和documentIndex包含addWord

import java.util.StringTokenizer; 

public class DocumentIndex extends java.util.ArrayList<IndexEntry> 
{ 

    /** 
    * Creates an empty DocumentIndex with the default 
    * initial capacity 
    */ 
    public DocumentIndex() 
    { 
    super(); 
    } 

    /** 
    * Creates an empty DocumentIndex with the capacity 
    * given by the parameter 
    * @param init the initial capacity of the list 
    */ 
    public DocumentIndex(int init) 
    { 
    super(init); 
    } 

/** 
    * If word is in this DocumentIndex and num is in its list, does nothing; 
    * if word is in this DocumentIndex and num is not in its list, adds num 
    * to this word's IndexEntry; otherwise creates a new entry with word and 
    * num and inserts it into this index in order 
    * @param word the word to look for 
    * @param num the line number this word is on 
    */ 

    public void addWord(String word, int num) 
    { 
     boolean match = false; 
     for (IndexEntry x : this){ 
      String i = x.getWord(); 
      if (i.toUpperCase().equals(word.toUpperCase())){ 
       x.add(num); 
       match = true;}} 
     if (match == false){ 
      IndexEntry entry = new IndexEntry(word); 
      int add = 0; 
      int count = 0; 
      boolean spot = false; 
      while (count < this.size() && !spot){ 
       String str = this.get(count).getWord(); 
       if (str.compareTo(word) > 0){ 
        add = count; 
        spot = true;} 
       count++;} 
      this.add(add, entry); 
      this.get(indexOf(entry)).add(num);} 
    } 

    /** 
    * For each word found in str, calls addWord(word, num) 
    * @param str a line of text 
    * @param num the line number for this line of text 
    */ 
    public void addAllWords(String str, int num) 
    { 
    StringTokenizer tokens = new StringTokenizer(str, " .,-;?!"); 
      // " .,-;?!" lists delimeters that separate words 

    while(tokens.hasMoreTokens()) 
    { 
     String word = tokens.nextToken(); 
     addWord(word, num); 
    } 
    } 
} 
+3

不在于它解决了什么,但'i.toUpperCase()。等于(word.toUpperCase())'可以改写为'i.equalsIgnoreCase(字)'。 – Pshemo

+0

@ user3808597请更新您的OP中的代码,以便来自外部的任何人都需要回答您的问题。 –

回答

1

编辑:您需要while循环之后添加以下行addWord

if (!spot && (count == this.size())){ 
    add = count; 
} 

这正好解决当我试图在我结束的错误。

另外,我觉得下面的版本是写addWord()方法的更清洁,更高效的方式:

public void addWord(String word, int num) { 
    String upperCaseWord = word.toUpperCase(); 

    for (IndexEntry x : this) { 
     String i = x.getWord(); 
     if (i.equals(upperCaseWord)){ 
      x.add(num); 
      return; 
     } 
    } 

    IndexEntry entry = new IndexEntry(word); 
    entry.add(num); 

    int currSize = this.size();  
    if (currSize == 0) { 
     this.add(entry); 
     return; 
    } 

    int count = 0; 
    while (count < currSize) { 
     String str = this.get(count).getWord(); 
     if (str.compareTo(upperCaseWord) > 0){ 
      break; 
     } 

     count++; 
    } 

    this.add(count, entry); 
} 
+0

这些地方我的结果,仍然不是按字母顺序http://pastebin.com/bw2FgBKx – user3808597

+0

你可以发布你用来打印出结果的代码吗? – jithinpt

+0

它是在indexEntry中的toString方法 – user3808597

0

Java的字符串通常根据排序的Unicode代码点的数字顺序,这是不是你想要的东西上。使用Collator来按字母排序。

0

我相信这个问题是当字母DocumentIndex正在形成,它可能不知道在什么地方新的词。


例如这里。

IndexEntry entry = new IndexEntry(word); 
int add = 0; 
int count = 0; 
boolean spot = false; 
while (count < this.size() && !spot) 
{ 
    String str = this.get(count).getWord(); 
    if (str.compareTo(word) > 0) 
    { 
     add = count; 
     spot = true; 
    } 

    count++; 
} 

this.add(add, entry); 
this.get(indexOf(entry)).add(num); 

如果循环结束而没有找到索引来放置字符串字会发生什么?您的代码状态如果字符串单词在字典顺序上不是字符串小于最终的String str,则字符串单词将简单地添加到DocumentIndex的开头(索引0)。因此,我认为这会在排序算法中产生一个问题,即在最后添加单词时在列表的开头添加单词。


我相信你需要添加一个条件,以检查是否需要添加到DocumentIndex结束串词。

IndexEntry entry = new IndexEntry(word); 
int add = 0; 
int count = 0; 
boolean spot = false; 
while (count < this.size() && !spot) 
{ 
    String str = this.get(count).getWord(); 
    if (str.compareTo(word) > 0) 
    { 
     add = count; 
     spot = true; 
    } 

    count++; 
} 

if (spot) // If the loop "spotted" an index, lets add it to the ArrayList. 
{ 
    this.add(add, entry); 
} 
else // Otherwise, lets add it to the end of the ArrayList. 
{ 
    this.add(entry); 
} 

this.get(indexOf(entry)).add(num);