Lucene的Apache不把我的旧索引

Indexer.java

public class Indexer { 

private IndexWriter writer; 

@SuppressWarnings("deprecation") 
public Indexer(String indexDirectoryPath) throws IOException { 
    Directory indexDirectory = FSDirectory.open(new File(indexDirectoryPath)); 
    writer = new IndexWriter(indexDirectory, new StandardAnalyzer(Version.LUCENE_36), true, 
      IndexWriter.MaxFieldLength.UNLIMITED); 
} 

public void close() throws CorruptIndexException, IOException { 
    writer.close(); 
} 

private Document getDocument(File file) throws IOException { 
    Document document = new Document(); 
    Field contentField = new Field(LuceneConstants.CONTENTS, new FileReader(file)); 
    Field fileNameField = new Field(LuceneConstants.FILE_NAME, file.getName(), Field.Store.YES, 
      Field.Index.NOT_ANALYZED); 
    Field filePathField = new Field(LuceneConstants.FILE_PATH, file.getCanonicalPath(), Field.Store.YES, 
      Field.Index.NOT_ANALYZED); 
    document.add(contentField); 
    document.add(fileNameField); 
    document.add(filePathField); 
    return document; 
} 

public void indexFile(File file) throws IOException { 
    Document document = getDocument(file); 
    writer.addDocument(document); 
} 

public int createIndex(String file) throws IOException { 
    indexFile(new File(file)); 
    return writer.numDocs(); 
}

}

Searcher.java

public class Searcher { 
IndexSearcher indexSearcher; 
QueryParser queryParser; 
Query query; 

@SuppressWarnings("deprecation") 
public Searcher(String indexDirectoryPath) throws IOException { 
    Directory indexDirectory = FSDirectory 
      .open(new File(indexDirectoryPath)); 
    indexSearcher = new IndexSearcher(indexDirectory); 
    queryParser = new QueryParser(Version.LUCENE_36, 
      LuceneConstants.CONTENTS, new StandardAnalyzer(
        Version.LUCENE_36)); 
} 

public TopDocs search(String searchQuery) throws IOException, 
     ParseException { 
    query = queryParser.parse(QueryParser.escape(searchQuery)); 
    return indexSearcher.search(query, LuceneConstants.MAX_SEARCH); 
} 

public Document getDocument(ScoreDoc scoreDoc) 
     throws CorruptIndexException, IOException { 
    return indexSearcher.doc(scoreDoc.doc); 
} 

public void close() throws IOException { 
    indexSearcher.close(); 
}

}

Lucene Constants.java

public class LuceneConstants { 
public static final String CONTENTS = "contents"; 
public static final String FILE_NAME = "filename"; 
public static final String FILE_PATH = "filepath"; 
public static final int MAX_SEARCH = 10;

}

这是我如何使用它们：

public static void main(String[] args) throws IOException, ParseException { 
    { 
     // First file 
     Indexer indexer = new Indexer("index"); 
     indexer.createIndex("f1.txt"); 
     indexer.close(); 
     Searcher searcher = new Searcher(Constante.DIR_INDEX.getValor()); 
     TopDocs hits = searcher.search("Art. 1°"); 
     for (ScoreDoc scoreDoc : hits.scoreDocs) { 
      org.apache.lucene.document.Document doc = searcher.getDocument(scoreDoc); 
      String nomeArquivo = doc.get(LuceneConstants.FILE_PATH); 
      System.out.println(nomeArquivo); 
     } 
    } 
    System.out.println("-----"); 
    { 
     // Second file 
     Indexer indexer = new Indexer("index"); 
     indexer.createIndex("f2.txt"); 
     indexer.close(); 
     Searcher searcher = new Searcher(Constante.DIR_INDEX.getValor()); 
     TopDocs hits = searcher.search("Art. 1°"); 
     for (ScoreDoc scoreDoc : hits.scoreDocs) { 
      org.apache.lucene.document.Document doc = searcher.getDocument(scoreDoc); 
      String nomeArquivo = doc.get(LuceneConstants.FILE_PATH); 
      System.out.println(nomeArquivo); 
     } 
    } 
}

这工作完全正常，直到 “//第二个文件” 行。

我索引我的第二个文件后，我无法在我的第一个文件中找到任何东西。

如果我创建一个Indexer的实例，并使用它的同一个实例来索引f1.txt和f2.txt并关闭它，那么它就像我希望的那样工作。问题是，如果我关闭我的应用程序并打开它并决定索引另一个文件，我会失去f1.txt和f2.txt。

有没有办法让Lucene在索引新文件时始终保持以前的索引？

来源

2016-05-03 Bolaum

看起来你使用的是旧版本的Lucene（3.6或更低版本），是否正确？

IndexWriter constructor的第三个参数指定是创建新索引还是打开现有索引。如果设置为true，它将覆盖现有索引（如果存在于给定目录中）。如果你想打开一个现有的索引而不覆盖它，它应该是false：

writer = new IndexWriter(indexDirectory, new StandardAnalyzer(Version.LUCENE_36), false, IndexWriter.MaxFieldLength.UNLIMITED);

来源

2016-05-03 15:40:05 femtoRgon

工作，谢谢。是的，我使用3.6.2，我会看看我是否可以使用当前版本。 – Bolaum

Lucene的Apache不把我的旧索引

回答

相关问题