2014-01-23 51 views
5

我在使用n-gram标记器的弹性搜索中创建了自动建议器。现在我想突出显示用户在自动建议列表中输入的字符序列。为了这个目的,我使用了可用于弹性搜索的荧光笔,我的代码如下,但是在输出中,完整的术语被突出显示,哪里出错了。在elasticsearch中突出显示部分单词

{ 
    "query": { 
     "query_string": { 
      "query": "soft", 
      "default_field": "competency_display_name" 
     } 
    }, 
    "highlight": { 
     "pre_tags": ["<b>"], 
     "post_tags": ["</b>"], 
     "fields": { 
      "competency_display_name": {} 
     } 
    } 
} 

,其结果是

{ 
    "took": 8, 
    "timed_out": false, 
    "_shards": { 
     "total": 5, 
     "successful": 5, 
     "failed": 0 
    }, 
    "hits": { 
     "total": 1, 
     "max_score": 1, 
     "hits": [ 
     { 
      "_index": "competency_auto_suggest", 
      "_type": "competency", 
      "_id": "4", 
      "_score": 1, 
      "_source": { 
       "review": null, 
       "competency_title": "Software Development", 
       "id": 4, 
       "competency_display_name": "Software Development" 
      }, 
      "highlight": { 
       "competency_display_name": [ 
        "<b>Software Development</b>" 
       ] 
      } 
     } 
     ] 
    } 
} 

映射

"competency":{ 
    "properties": { 
     "competency_display_name":{ 
      "type":"string", 
      "index_analyzer": "index_ngram_analyzer", 
      "search_analyzer": "search_term_analyzer" 
     } 
    } 
} 

设置

"analysis": { 
    "filter": { 
     "ngram_tokenizer": { 
      "type": "nGram", 
      "min_gram": "1", 
      "max_gram": "15", 
      "token_chars": [ "letter", "digit" ] 
     } 
    }, 
    "analyzer": { 
     "index_ngram_analyzer": { 
      "type": "custom", 
      "tokenizer": "keyword", 
      "filter": [ "ngram_tokenizer", "lowercase" ] 
     }, 
     "search_term_analyzer": { 
      "type": "custom", 
      "tokenizer": "keyword", 
      "filter": "lowercase" 
     } 
    } 
} 

如何突出软,而不是软件开发。

+0

你可以发表你的'competency_display_name'映射? –

+0

“index_ngram_analyzer”的设置是什么? –

+0

“index_ngram_analyzer”的设置是什么? –

回答

8

在这种情况下,您应该使用ngram tokenizer而不是ngram filter来突出显示。需要 with_positions_offsets以帮助突出显示更快。

这里是可行的设置&映射:

"analysis": { 
    "tokenizer": { 
     "ngram_tokenizer": { 
      "type": "nGram", 
      "min_gram": "1", 
      "max_gram": "15", 
      "token_chars": [ "letter", "digit" ] 
     } 
    }, 
    "analyzer": { 
     "index_ngram_analyzer": { 
      "type": "custom", 
      "tokenizer": "ngram_tokenizer", 
      "filter": [ "lowercase" ] 
     }, 
     "search_term_analyzer": { 
      "type": "custom", 
      "tokenizer": "keyword", 
      "filter": "lowercase" 
     } 
    } 
} 

映射

"competency":{ 
    "properties": { 
     "competency_display_name":{ 
      "type":"string", 
      "index_analyzer": "index_ngram_analyzer", 
      "search_analyzer": "search_term_analyzer", 
      "term_vector":"with_positions_offsets" 
     } 
    } 
} 
+3

+1。对于Elasticsearch 2.0,有必要改变''index_analyzer''''''分析器“''。有关详细信息,请参见[破解2.0中的映射更改](https://www.elastic.co/guide/en/elasticsearch/reference/current/breaking_20_mapping_changes.html#_analyzer_mappings)。 – Murta