2015-09-15 110 views
0

我有一个名为“lang”的字段,其中包含值“en_US”,“en_GB”,“ru_RU”,e.t.c.与此映射查询完全包含查询的一部分的字符串

"lang": { 
    "type": "string", 
     "index": "not_analyzed", 
      "fields": { 
       "raw": { 
        "type": "string", 
        "index": "not_analyzed", 
        "ignore_above": 256 
       } 
      } 

如何过滤文档,例如来自“美国”?

回答

0

您可以执行此操作的方法之一是更改上层字段中的"index": "not_analyzed",并为该字段设置pattern analyzer。由于您已经设置了"lang.raw"字段,因此您仍然可以获取未触及的版本以进行刻面或其他任何操作。

所以,要测试它,我设置了这样一个指标:

PUT /test_index 
{ 
    "settings": { 
     "number_of_shards": 1, 
     "analysis": { 
     "analyzer": { 
      "whitespace_underscore": { 
       "type": "pattern", 
       "pattern": "[\\s_]+", 
       "lowercase": false 
      } 
     } 
     } 
    }, 
    "mappings": { 
     "doc": { 
     "properties": { 
      "name": { 
       "type": "string" 
      }, 
      "lang": { 
       "type": "string", 
       "index_analyzer": "whitespace_underscore", 
       "search_analyzer": "standard", 
       "fields": { 
        "raw": { 
        "type": "string", 
        "index": "not_analyzed", 
        "ignore_above": 256 
        } 
       } 
      } 
     } 
     } 
    } 
} 

,并增加了一些文档:

POST /test_index/doc/_bulk 
{"index":{"_id":1}} 
{"name":"doc1","lang":"en_US"} 
{"index":{"_id":2}} 
{"name":"doc2","lang":"en_GB"} 
{"index":{"_id":3}} 
{"name":"doc3","lang":"ru_RU"} 

现在我可以像这样通过"US"过滤:

POST /test_index/_search 
{ 
    "query": { 
     "filtered": { 
     "filter": { 
      "term": { 
       "lang": "US" 
      } 
     } 
     } 
    } 
} 
... 
{ 
    "took": 1, 
    "timed_out": false, 
    "_shards": { 
     "total": 1, 
     "successful": 1, 
     "failed": 0 
    }, 
    "hits": { 
     "total": 1, 
     "max_score": 1, 
     "hits": [ 
     { 
      "_index": "test_index", 
      "_type": "doc", 
      "_id": "1", 
      "_score": 1, 
      "_source": { 
       "name": "doc1", 
       "lang": "en_US" 
      } 
     } 
     ] 
    } 
} 

而且,我仍然可以通过"lang.raw"获得一个术语汇总值列表:

POST /test_index/_search?search_type=count 
{ 
    "aggs": { 
     "lang_terms": { 
     "terms": { 
      "field": "lang.raw" 
     } 
     } 
    } 
} 
... 
{ 
    "took": 2, 
    "timed_out": false, 
    "_shards": { 
     "total": 1, 
     "successful": 1, 
     "failed": 0 
    }, 
    "hits": { 
     "total": 3, 
     "max_score": 0, 
     "hits": [] 
    }, 
    "aggregations": { 
     "lang_terms": { 
     "doc_count_error_upper_bound": 0, 
     "sum_other_doc_count": 0, 
     "buckets": [ 
      { 
       "key": "en_GB", 
       "doc_count": 1 
      }, 
      { 
       "key": "en_US", 
       "doc_count": 1 
      }, 
      { 
       "key": "ru_RU", 
       "doc_count": 1 
      } 
     ] 
     } 
    } 
} 

这里是我用来测试它的代码:

http://sense.qbox.io/gist/ac3f3fd66ea649c0c3a8010241d1f6981a7e012c