2012-05-31 44 views
1

我想使用lucene进行增量搜索。我在两个词之间有空格。 说“今天的印度”。我的搜索查询返回使用Lucene的增量搜索

  • 印度今天
  • 如今,印度时间
  • 今天时间印度

我希望搜索导致诸如“今天的印度%”在SQL中。 但是这没有发生。我试过使用短语查询,但它适用于精确search.My存储的数据NOT_ANALYZED,以便我可以搜索空间。

KeywordAnalyzer analyzer = new KeywordAnalyzer(); 

PhraseQuery pq = new PhraseQuery(); 
pq.add(new Term("name", "MR DANIEL KELLEHER")); 


int hitsPerPage = 1000; 
IndexReader reader = IndexReader.open(index); 
IndexSearcher searcher = new IndexSearcher(reader); 
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); 
searcher.search(pq, collector); 

我不能够得到这样的查询中还有空间inbetween.I都提到净许多文章和计算器,以及,但没有得到解决。

package org.lucenesample; 

import org.apache.lucene.search.Query; 

import org.apache.lucene.*; 
import org.apache.lucene.analysis.*; 
import org.apache.lucene.analysis.standard.*; 
import org.apache.lucene.analysis.standard.std31.*; 
import org.apache.lucene.analysis.tokenattributes.*; 
import org.apache.lucene.collation.*; 
import org.apache.lucene.document.*; 
import org.apache.lucene.document.Field.Index; 
import org.apache.lucene.document.Field.Store; 
import org.apache.lucene.index.*; 
import org.apache.lucene.index.IndexWriter.MaxFieldLength; 
import org.apache.lucene.messages.*; 
import org.apache.lucene.queryParser.*; 
import org.apache.lucene.search.*; 
import org.apache.lucene.search.function.*; 
import org.apache.lucene.search.payloads.*; 
import org.apache.lucene.search.spans.*; 
import org.apache.lucene.store.*; 
import org.apache.lucene.util.*; 
import org.apache.lucene.util.fst.*; 
import org.apache.lucene.util.packed.*; 

import java.io.File; 
import java.sql.*; 
import java.util.HashMap; 

public class ExactPhrasesearchUsingStandardAnalyser { 

    /** 
    * @param args 
    */ 
    public static void main(String[] args) throws Exception { 
     Directory directory = new RAMDirectory(); 
     StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); 
     MaxFieldLength mlf = MaxFieldLength.UNLIMITED; 
     IndexWriter writer = new IndexWriter(directory, analyzer, true, mlf); 
     writer.addDocument(createDocument1("1", "foo bar baz blue")); 
     writer.addDocument(createDocument1("2", "red green blue")); 
     writer.addDocument(createDocument1("3", "test panda foo & bar testt")); 
     writer.addDocument(createDocument1("4", " bar test test foo in panda red blue ")); 
     writer.addDocument(createDocument1("4", "test")); 
     writer.close(); 

    IndexSearcher searcher = new IndexSearcher(directory); 
    PhraseQuery query = new PhraseQuery(); 


    QueryParser qp2 = new QueryParser(Version.LUCENE_35, "contents", analyzer); 
    //qp.setDefaultOperator(QueryParser.Operator.AND); 
    Query queryx2 =qp2.parse("test foo in panda re*");//contains query 
    Query queryx23 =qp2.parse("+red +green +blu*" );//exact phrase match query.Make last word as followed by star 
    Query queryx234 =qp2.parse("(+red +green +blu*)& (\"red* green\") " ); 





    /*String term = "new york"; 
     // id and location are the fields in which i want to search the "term" 
     MultiFieldQueryParser queryParser = new MultiFieldQueryParser(
              Version.LUCENE_35, 
              { "contents"}, 
              new KeywordAnalyzer()); 
     Query query = queryParser.parse(term); 
     System.out.println(query.toString());*/ 

    QueryParser qp = new QueryParser(Version.LUCENE_35, "contents", analyzer); 
    //qp.setDefaultOperator(QueryParser.Operator.AND); 

    Query queryx =qp.parse("\"air quality\"~10"); 
    System.out.println("******************Searching Code starts******************"); 
    TopDocs topDocs = searcher.search(queryx2, 10); 
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) { 
     Document doc = searcher.doc(scoreDoc.doc); 
     System.out.println(doc+"testtttttttt"); 
    } 

} 



    private static Document createDocument1(String id, String content) { 
     Document doc = new Document(); 
     doc.add(new Field("id", id, Store.YES, Index.NOT_ANALYZED)); 
     doc.add(new Field("contents", content, Store.YES, Index.ANALYZED, 
       Field.TermVector.WITH_POSITIONS_OFFSETS)); 
     System.out.println(content); 
     return doc; 
    } 
} 

我想这way.I可以搜索包含格式。但我不能够与选项得到启动,这样当用户按下“印度”,然后“印度的明天”和“今日印度”的结果也出现了。当我做“+ india * + to *”时,我能够接近它。但是这导致“今天的印度人”也是如此。我无法获得搜索结果,直到用户类型完成“今天”。基本上,我。希望短语查询“\”今日印度\”上班

+0

是前缀查询你想要什么? – Kevin

+0

前缀查询也不支持两者之间的空间。我也尝试过。 –

回答

1

对于分析的领域,一个方法是使用与已经列举的prefix terms一个MultiPhraseQuery

<MultiPhraseQuery: field:"india (today todays)"> 

或者可以使用SpanQuery,其好处是可以处理术语扩展。

<SpanNearQuery: spanNear([field:india, SpanMultiTermQueryWrapper(field:today*)], 0, true)> 
+0

嗨科迪,你能给这个例子的代码..我无法弄清楚如何使用spanMultitermquerywrapper。 –

+0

SpanMultiTermQueryWrapper不存在于普通lucene 3.5中。什么可以是其他选项..? –

+0

它应该在java lucene 3.5中。以下是文档中的示例。 WildcardQuery wildcard = new WildcardQuery(新术语(“field”,“bro?n”)); SpanQuery spanWildcard = new SpanMultiTermQueryWrapper (wildcard); ' –