聚合上的ElasticSearch过滤器在不影响聚合计数的情况下

我们使用ElasticSearch根据5个字段查找商品，例如某些“自由文本”，商品状态和客户名称。我们还需要在两个字段中汇总客户名称和报价状态。所以当有人输入一些自由文本时，我们发现有10个状态为闭合状态，8个状态为开放状态，'状态过滤器'应该包含关闭状态（10）和打开状态（8）。聚合上的ElasticSearch过滤器在不影响聚合计数的情况下

现在的问题是，当我选择状态'封闭'被包含在过滤器中，打开的聚合结果更改为0.我希望这保持8.所以我怎样才能防止聚合上的过滤器影响聚合本身？

这是第一个搜索，比如搜索“Java”作为：

{ 
    "query": { 
     "bool": { 
      "filter": [ 
      ], 
      "must": { 
       "simple_query_string": { 
        "query" : "java" 
       } 
      } 
     } 
    }, 
    "aggs": { 
     "OFFER_STATE_F": { 
      "terms": { 
       "size": 0, 
       "field": "offer_state_f", 
       "min_doc_count": 0 
      } 
     } 
    }, 
    "from": 0, 
    "size": 1, 
    "fields": ["offer_id_ft", "offer_state_f"] 
}

结果是这样的：

{ 
    "hits": { 
    "total": 960, 
    "max_score": 0.89408284000000005, 
    "hits": [ 
     { 
     "_type": "offer", 
     "_index": "select", 
     "_id": "40542", 
     "fields": { 
      "offer_id_ft": [ 
      "40542" 
      ], 
      "offer_state_f": [ 
      "REJECTED" 
      ] 
     }, 
     "_score": 0.89408284000000005 
     } 
    ] 
    }, 
    "_shards": { 
    "total": 5, 
    "successful": 5, 
    "failed": 0 
    }, 
    "timed_out": false, 
    "aggregations": { 
    "OFFER_STATE_F": { 
     "buckets": [ 
     { 
      "key": "REJECTED", 
      "doc_count": 778 
     }, 
     { 
      "key": "ACCEPTED", 
      "doc_count": 130 
     }, 
     { 
      "key": "CANCELED", 
      "doc_count": 22 
     }, 
     { 
      "key": "WITHDRAWN", 
      "doc_count": 13 
     }, 
     { 
      "key": "LONGLIST", 
      "doc_count": 12 
     }, 
     { 
      "key": "SHORTLIST", 
      "doc_count": 5 
     }, 
     { 
      "key": "INTAKE", 
      "doc_count": 0 
     } 
     ], 
     "doc_count_error_upper_bound": 0, 
     "sum_other_doc_count": 0 
    } 
    }, 
    "took": 2 
}

正如你看到的，client_state_f桶的总和等于总命中（960）。现在，我在查询中包含一个状态，说'已接受'。所以我的查询变为：

{ 
    "query": { 
     "bool": { 
      "filter": [ 
       { 
        "bool": { 
         "should": [ 
          { 
           "term": { 
            "offer_state_f": "ACCEPTED" 
           } 
          } 
         ] 
        } 
       }    
      ], 
      "must": { 
       "simple_query_string": { 
        "query" : "java" 
       } 
      } 
     } 
    }, 
    "aggs": { 
     "OFFER_STATE_F": { 
      "terms": { 
       "size": 0, 
       "field": "offer_state_f", 
       "min_doc_count": 0 
      } 
     } 
    }, 
    "from": 0, 
    "size": 1, 
    "fields": ["offer_id_ft", "offer_state_f"] 
}

我要的是130个的结果，但client_state_f桶消力总结高达960，但我得到的是这样的：

{ 
    "hits": { 
    "total": 130, 
    "max_score": 0.89408284000000005, 
    "hits": [ 
     { 
     "_type": "offer", 
     "_index": "select", 
     "_id": "16884", 
     "fields": { 
      "offer_id_ft": [ 
      "16884" 
      ], 
      "offer_state_f": [ 
      "ACCEPTED" 
      ] 
     }, 
     "_score": 0.89408284000000005 
     } 
    ] 
    }, 
    "_shards": { 
    "total": 5, 
    "successful": 5, 
    "failed": 0 
    }, 
    "timed_out": false, 
    "aggregations": { 
    "OFFER_STATE_F": { 
     "buckets": [ 
     { 
      "key": "ACCEPTED", 
      "doc_count": 130 
     }, 
     { 
      "key": "CANCELED", 
      "doc_count": 0 
     }, 
     { 
      "key": "INTAKE", 
      "doc_count": 0 
     }, 
     { 
      "key": "LONGLIST", 
      "doc_count": 0 
     }, 
     { 
      "key": "REJECTED", 
      "doc_count": 0 
     }, 
     { 
      "key": "SHORTLIST", 
      "doc_count": 0 
     }, 
     { 
      "key": "WITHDRAWN", 
      "doc_count": 0 
     } 
     ], 
     "doc_count_error_upper_bound": 0, 
     "sum_other_doc_count": 0 
    } 
    }, 
    "took": 10 
}

正如你所看到的，只有已接受的存储桶已满，其他所有存储都为0.

来源

2016-02-23 JointEffort

好吧，我在一位同事的帮助下找到了答案，事情就是，Val i是对的。为他+1。我所做的是将所有查询过滤器放在post_filter中，这就是问题所在。我只需将过滤器放置在我想要在post_filter中聚合的字段中。因此：

{ 
    "query": { 
     "bool": { 
      "filter": [ 
      { 
       "term": { 
        "broker_f": "false" 
       } 
      } 
      ], 
      "must": { 
       "simple_query_string": { 
        "query" : "java" 
       } 
      } 
     } 
    }, 
    "aggs": { 
     "OFFER_STATE_F": { 
      "terms": { 
       "size": 0, 
       "field": "offer_state_f", 
       "min_doc_count": 0 
      } 
     } 
    }, 
    "post_filter" : { 
     "bool": { 
      "should": [ 
       { 
        "term": { 
         "offer_state_f": "SHORTLIST" 
        } 
       } 
      ] 
     } 
    }, 
    "from": 0, 
    "size": 1, 
    "fields": ["offer_id_ft", "offer_state_f"] 
}

而现在的结果是正确的：

{ 
    "hits": { 
    "total": 5, 
    "max_score": 0.76667790000000002, 
    "hits": [ 
     { 
     "_type": "offer", 
     "_index": "select", 
     "_id": "24454", 
     "fields": { 
      "offer_id_ft": [ 
      "24454" 
      ], 
      "offer_state_f": [ 
      "SHORTLIST" 
      ] 
     }, 
     "_score": 0.76667790000000002 
     } 
    ] 
    }, 
    "_shards": { 
    "total": 5, 
    "successful": 5, 
    "failed": 0 
    }, 
    "timed_out": false, 
    "aggregations": { 
    "OFFER_STATE_F": { 
     "buckets": [ 
     { 
      "key": "REJECTED", 
      "doc_count": 777 
     }, 
     { 
      "key": "ACCEPTED", 
      "doc_count": 52 
     }, 
     { 
      "key": "CANCELED", 
      "doc_count": 22 
     }, 
     { 
      "key": "LONGLIST", 
      "doc_count": 12 
     }, 
     { 
      "key": "WITHDRAWN", 
      "doc_count": 12 
     }, 
     { 
      "key": "SHORTLIST", 
      "doc_count": 5 
     }, 
     { 
      "key": "INTAKE", 
      "doc_count": 0 
     } 
     ], 
     "doc_count_error_upper_bound": 0, 
     "sum_other_doc_count": 0 
    } 
    }, 
    "took": 4 
}

来源

2016-02-25 15:08:04 JointEffort

不要忘记+1“为他”;-) – Val

您需要将过滤器移至post_filter部分，而不是query部分。

这样，过滤将在计算聚合后应用，并且您将能够聚合整组数据，但只会得到与过滤器匹配的结果匹配。

来源

2016-02-23 13:01:00 Val

嗨，这并不能达到预期的效果。我想在'自由文本'上进行搜索，计算结果中每个状态/客户端名称的出现次数，然后将这些州/名称用作缩小结果的多选过滤器。但多选应该是一个'OR'明智的过滤器。有什么建议么？ – JointEffort

对不起，我一定误解了这个问题。让我考虑一下，除非有人在此期间有一个好的解决方案。也许如果你可以分享你现在拥有的东西，那可能有助于描绘它。 – Val

聚合上的ElasticSearch过滤器在不影响聚合计数的情况下

回答

相关问题