我有一个简单的自定义分析器,似乎正确地在SQL服务器的索引中生成语音哈希。它似乎大多数尝试查询使用我的自定义分析器生成的索引不返回结果。我一直无法找到类似的情况,所以我一定会做错的。用自定义分析器/过滤器搜索返回没有结果
自定义过滤器:
internal class SoundexFilter : TokenFilter
{
private readonly ITermAttribute _termAttr;
private Queue<Token> soundexTokenQueue
= new Queue<Token>();
public SoundexFilter(TokenStream input)
: base(input)
{
_termAttr = AddAttribute<ITermAttribute>();
}
public override bool IncrementToken()
{
if (input.IncrementToken())
{
string currentTerm = _termAttr.Term;
var hash = Soundex.For(currentTerm);
Console.WriteLine("Original: {0}, Hash: {1}", currentTerm, hash);
soundexTokenQueue.Enqueue(new Token(hash, 0, hash.Length));
return true;
}
else if (soundexTokenQueue.Count > 0)
{
var token = soundexTokenQueue.Dequeue();
_termAttr.SetTermBuffer(token.Term);
_termAttr.SetTermLength(token.TermLength());
return true;
}
return false;
}
}
定制分析器:
public class SoundexAnalyzer : Analyzer
{
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
//create the tokenizer
TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader);
//add in filters
result = new StandardFilter(result);
// Add soundex filter
result = new SoundexFilter(result);
return result;
}
}
简单的测试程序:
public class Program
{
private const string NAME = "John Smith";
private const string SEARCH_NAME = "John Smith";
private Analyzer _analyzer = new SoundexAnalyzer();
private Directory _directory = new RAMDirectory();
internal void Run(string[] args)
{
using (var writer = new IndexWriter(_directory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
{
var field = new Field("Name", NAME, Field.Store.YES, Field.Index.ANALYZED);
var document = new Document();
document.Add(field);
writer.AddDocument(document);
// Unnecessary but helps imply intent
writer.Commit();
}
using (var searcher = new IndexSearcher(_directory))
{
var parser = new QueryParser(Version.LUCENE_30, "Name", _analyzer);
var query = parser.Parse(SEARCH_NAME);
var docs = searcher.Search(query, 10);
Console.WriteLine("\nReturned Docs:");
foreach (var scoreDoc in docs.ScoreDocs)
{
var doc = searcher.Doc(scoreDoc.Doc);
Console.WriteLine(doc.Get("Name"));
}
}
}
private static void Main(string[] args)
{
new Program().Run(args);
}
}
该成功使用此代码是唯一的搜索完全匹配像NAME = "John"
和SEARCH_NAME = "John"
。
奇怪的是,在Luke中搜索标准分析器的语音哈希工作正常,因此写入必须按预期工作(或者至少是我的期望)。
我已经做了相当多的研究,没有什么帮助。任何想法我失踪?