2016-12-05 33 views
0

我有一个字段(“关键字”)的自动完成分析器。该字段是一个字符串数组。当我用查询字符串进行查询时,首先要显示数组关键字中单个元素最匹配的文档。问题是,如果字符串的一部分与数组“关键字”的更多元素相匹配,那么这个文档会出现在另一个比较少但匹配更好的文档之前。举例来说,如果我有一个词“加油站”的查询返回的文档的关键字是这些:如何通过ElasticSearch中的数组元素的最大相关匹配得分?

"hits": [ 
    { 
    "_index": "locali_v3", 
    "_type": "categories", 
    "_id": "5810767ddc536a03b4761acd", 
    "_score": 3.1974547, 
    "_source": { 
     "keywords": [ 
     "Radio Station", 
     "Radio Station" 
     ] 
    } 
    }, 
    { 
    "_index": "locali_v3", 
    "_type": "categories", 
    "_id": "581076d8dc536a03b4761cc3", 
    "_score": 3.0407648, 
    "_source": { 
     "keywords": [ 
     "Stationery Store", 
     "Stationery Store" 
     ] 
    } 
    }, 
    { 
    "_index": "locali_v3", 
    "_type": "categories", 
    "_id": "5810767ddc536a03b4761ace", 
    "_score": 2.903595, 
    "_source": { 
     "keywords": [ 
     "TV Station", 
     "TV Station" 
     ] 
    } 
    }, 
    { 
    "_index": "locali_v3", 
    "_type": "categories", 
    "_id": "581076cddc536a03b4761c87", 
    "_score": 2.517158, 
    "_source": { 
     "keywords": [ 
     "Praktoreio Ugrwn Kausimwn/Gkaraz", 
     "Praktoreio Ygrwn Kaysimwn/Gkaraz", 
     "Praktoreio Ugron Kausimon/Gkaraz", 
     "Praktoreio Ygron Kaysimon/Gkaraz", 
     "Πρακτορείο Υγρών Καυσίμων/Γκαράζ", 
     "Gas Station" 
     ] 
    } 
} 

的‘加油站’是第四,虽然它有最好的单元素匹配。有没有办法告诉ElasticSearch我不关心关键字中出现“gas”或“station”多少次?我希望数组关键字的最大元素匹配作为分数因子。

我的设置是:

{ 
    "locali": { 
    "settings": { 
     "index": { 
     "creation_date": "1480937810266", 
    "analysis": { 
     "filter": { 
     "autocomplete_filter": { 
      "type": "edge_ngram", 
      "min_gram": "1", 
      "max_gram": "20" 
     } 
     }, 
     "analyzer": { 
     "keywords": { 
      "filter": [ 
      "lowercase", 
      "autocomplete_filter" 
      ], 
      "char_filter": [ 
      "my_char_filter" 
      ], 
      "type": "custom", 
      "tokenizer": "standard" 
     } 
     }, 
     "char_filter": { 
     "my_char_filter": { 
      "type": "mapping", 
      "mappings": [ 
      "ί => ι", 
      "Ί => Ι", 
      "ή => η", 
      "Ή => Η", 
      "ύ => υ", 
      "Ύ => Υ", 
      "ά => α", 
      "Ά => Α", 
      "έ => ε", 
      "Έ => Ε", 
      "ό => ο", 
      "Ό => Ο", 
      "ώ => ω", 
      "Ώ => Ω", 
      "ϊ => ι", 
      "ϋ => υ", 
      "ΐ => ι", 
      "ΰ => υ" 
      ] 
     } 
     } 
    }, 
    "number_of_shards": "1", 
    "number_of_replicas": "1", 
    "uuid": "TJjOt9L9QE2HrsUFHM6zJg", 
    "version": { 
     "created": "2040099" 
    } 
    } 
} 
    } 
} 

而且映射:

{ 
    "locali": { 
"mappings": { 
    "places": { 
    "properties": { 
     "formattedCategories": { 
     "properties": { 
      "english": { 
      "type": "string" 
      }, 
      "greek": { 
      "type": "string" 
      } 
     } 
     }, 
     "keywords": { 
     "type": "string", 
     "analyzer": "keywords" 
     }, 
     "loc": { 
     "properties": { 
      "coordinates": { 
      "type": "geo_point" 
      } 
     } 
     }, 
     "location": { 
     "properties": { 
      "formattedAddress": { 
      "properties": { 
       "english": { 
       "type": "string" 
       }, 
       "greek": { 
       "type": "string" 
       } 
      } 
      }, 
      "locality": { 
      "properties": { 
       "english": { 
       "type": "string" 
       }, 
       "greek": { 
       "type": "string" 
       } 
      } 
      }, 
      "neighbourhood": { 
      "properties": { 
       "english": { 
       "type": "string" 
       }, 
       "greek": { 
       "type": "string" 
       } 
      } 
      } 
     } 
     }, 
     "name": { 
     "properties": { 
      "english": { 
      "type": "string" 
      }, 
      "greek": { 
      "type": "string" 
      } 
     } 
     }, 
     "rating": { 
     "properties": { 
      "rating": { 
      "type": "long" 
      } 
     } 
     }, 
     "seenDetails": { 
     "type": "long" 
     }, 
     "verified": { 
     "type": "long" 
     } 
    } 
    }, 
    "regions": { 
    "properties": { 
     "keywords": { 
     "type": "string", 
     "analyzer": "keywords" 
     }, 
     "loc": { 
     "properties": { 
      "coordinates": { 
      "type": "geo_point" 
      } 
     } 
     }, 
     "name": { 
     "properties": { 
      "english": { 
      "type": "string" 
      }, 
      "greek": { 
      "type": "string" 
      } 
     } 
     }, 
     "type": { 
     "type": "long" 
     }, 
     "weight": { 
     "type": "long" 
     } 
    } 
    }, 
    "categories": { 
    "properties": { 
     "keywords": { 
     "type": "string", 
     "analyzer": "keywords" 
     }, 
     "name": { 
     "properties": { 
      "english": { 
      "type": "string" 
      }, 
      "greek": { 
      "type": "string" 
      } 
     } 
     }, 
     "weight": { 
     "type": "long" 
     } 
    } 
    } 
} 
    } 
} 
+0

你能在这里粘贴你的映射和设置吗? – user3775217

回答

0

你能在这里发表您查询您也想在这里。 我想你的例子与下面的查询

{ 
    "query": {"match": { 
    "keywords": "gas station" 
    } 
    } 
} 

,我得到了你想要的结果。

{ 
    "took": 2, 
    "timed_out": false, 
    "_shards": { 
    "total": 5, 
    "successful": 5, 
    "failed": 0 
    }, 
    "hits": { 
    "total": 3, 
    "max_score": 0.081366636, 
    "hits": [ 
     { 
     "_index": "stack", 
     "_type": "type", 
     "_id": "AVjP6QnpdNp-z_ybGd-L", 
     "_score": 0.081366636, 
     "_source": { 
      "keywords": [ 
      "Praktoreio Ugrwn Kausimwn/Gkaraz", 
      "Praktoreio Ygrwn Kaysimwn/Gkaraz", 
      "Praktoreio Ugron Kausimon/Gkaraz", 
      "Praktoreio Ygron Kaysimon/Gkaraz", 
      "Πρακτορείο Υγρών Καυσίμων/Γκαράζ", 
      "Gas Station" 
      ] 
     } 
     }, 
     { 
     "_index": "stack", 
     "_type": "type", 
     "_id": "AVjP5-u5dNp-z_ybGd-I", 
     "_score": 0.03182549, 
     "_source": { 
      "keywords": [ 
      "Radio Station", 
      "Radio Station" 
      ] 
     } 
     }, 
     { 
     "_index": "stack", 
     "_type": "type", 
     "_id": "AVjP6KiKdNp-z_ybGd-K", 
     "_score": 0.03182549, 
     "_source": { 
      "keywords": [ 
      "TV Station", 
      "TV Station" 
      ] 
     } 
     } 
    ] 
    } 
} 

试试这个查询,看看你是否得到所需的结果。如果这不适合你,你也可以回答你的映射,查询和ES版本。

希望这可以解决您的问题。谢谢

+0

我有一个名为“keywords”的自动完成分析器。我正在使用相同的查询,但结果如上所述。 –

+0

好的。尝试解释API以了解为什么它使用分析器得分不同,也尝试检查由存储在倒排索引中的此分析器产生的分析术语。 – user3775217

+0

我做了一些四处看看,它的出现是因为tdf/idf在倒排索引上的值不同,因为您在映射中使用了边缘ngram标记。所以这个tdf和idf负责为前三个文件提供更多的提升。 – user3775217

相关问题