2016-06-12 72 views
1

我有一个看起来像这样的文件(这里是举例):ElasticSearch双重嵌套排序

{ 
"user": "xyz", 
"state": "FINISHED", 
"finishedTime": 1465566467161, 
"jobCounters": { 
    "counterGroup": [ 
     { 
      "counterGroupName": "org.apache.hadoop.mapreduce.FileSystemCounter", 
      "counter": [ 
       { 
        "name": "FILE_BYTES_READ", 
        "mapCounterValue": 206509212380, 
        "totalCounterValue": 423273933523, 
        "reduceCounterValue": 216764721143 
       }, 
       { 
        "name": "FILE_BYTES_WRITTEN", 
        "mapCounterValue": 442799895522, 
        "totalCounterValue": 659742824735, 
        "reduceCounterValue": 216942929213 
       }, 
       { 
        "name": "HDFS_BYTES_READ", 
        "mapCounterValue": 207913352565, 
        "totalCounterValue": 207913352565, 
        "reduceCounterValue": 0 
       }, 
       { 
        "name": "HDFS_BYTES_WRITTEN", 
        "mapCounterValue": 0, 
        "totalCounterValue": 89846725044, 
        "reduceCounterValue": 89846725044 
       } 
      ] 
     }, 
     { 
      "counterGroupName": "org.apache.hadoop.mapreduce.JobCounter", 
      "counter": [ 
       { 
        "name": "TOTAL_LAUNCHED_MAPS", 
        "mapCounterValue": 0, 
        "totalCounterValue": 13394, 
        "reduceCounterValue": 0 
       }, 
       { 
        "name": "TOTAL_LAUNCHED_REDUCES", 
        "mapCounterValue": 0, 
        "totalCounterValue": 720, 
        "reduceCounterValue": 0 
       } 
      ] 
     } 
    ] 
} 

}

现在我想sort这个数据得到的totalCounterValue,其中的基础上,TOP 15份文件counter.nameFILE_BYTES_READ。我已经尝试过嵌套排序,但无论我在counter.name中编写哪个键名,它总是按HDFS_BYTES_READ排序。任何人都可以请我帮助我的查询。

{ 
"_source": true, 
"size": 15, 
"query": { 
    "bool": { 
     "must": [ 
      { 
       "term": { 
        "state": { 
         "value": "FINISHED" 
        } 
       } 
      }, 
      { 
       "range": { 
        "startedTime": { 
         "gte": "now - 4d", 
         "lte": "now" 
        } 
       } 
      } 
     ] 
    } 
}, 
"sort": [ 
    { 
     "jobCounters.counterGroup.counter.totalCounterValue": { 
      "order": "desc", 
      "nested_path": "jobCounters.counterGroup", 
      "nested_filter": { 
       "nested": { 
        "path": "jobCounters.counterGroup.counter", 
        "filter": { 
         "term": { 
          "jobCounters.counterGroup.counter.name": "file_bytes_read" 
         } 
        } 
       } 
      } 
     } 
    } 
]} 

这是我们创造了jobCounters映射:

"jobCounters": { 
     "type": "nested", 
     "include_in_parent": true, 
     "properties" : { 
      "counterGroup": { 
      "type": "nested", 
      "include_in_parent": true, 
      "properties": { 
       "counterGroupName": { 
       "type": "string", 
       "fields": { 
        "raw": { 
         "type": "string", 
         "index": "not_analyzed" 
         } 
        } 
       }, 
       "counter" : { 
        "type": "nested", 
        "include_in_parent": true, 
        "properties": { 
         "reduceCounterValue": { 
          "type": "long" 
         }, 
         "name": { 
          "type": "string", 
          "analyzer": "english", 
          "fields": { 
           "raw": { 
            "type": "string", 
            "index": "not_analyzed" 
           } 
          } 
         }, 
         "totalCounterValue": { 
          "type": "long" 
         }, 
         "mapCounterValue": { 
          "type": "long" 
         }     
        }     
       }      
      } 
      } 
     } 
    } 

我跟着嵌套排序ElasticSearch的文件和与此查询上来了,但我不知道为什么它总是排序totalCounterValueHDFS_BYTES_READ,不管jobCounters.counterGroup.counter.name的价值。

+0

你可以分享映射吗? –

+0

分享!对不起,:) :) – rg41

回答

0

,你可以尝试这样的事情,

curl -XGET 'http://localhost:9200/index/jobCounters/_search' -d ' 
{ 
    "size": 15, 
    "query": { 
    "nested": { 
     "path": "jobCounters.counterGroup.counter", 
     "filter": { 
     "term": { 
      "jobCounters.counterGroup.counter.name": "file_bytes_read" 
     } 
     } 
    } 
    }, 
    "sort": [ 
    { 
     "jobCounters.counterGroup.counter.totalCounterValue": { 
     "order": "desc", 
     "nested_path": "jobCounters.counterGroup", 
     "nested_filter": { 
      "nested": { 
      "path": "jobCounters.counterGroup.counter", 
      "filter": { 
       "term": { 
       "jobCounters.counterGroup.counter.name": "file_bytes_read" 
       } 
      } 
      } 
     } 
     } 
    } 
    ] 
} 
' 

阅读this文档的末尾。它解释了我们必须在nested_filter中重复相同的查询。

+0

不,它仍然在其他领域排序。此外,我认为在这个查询中排序无法知道需要排序哪个'''totalCounterValue'',所以它为第一个排序做好准备。 – rg41

+0

我编辑了我的答案。我认为你必须检查jobCounters.counterGroup.counter.name是否与排序之前和排序嵌套过滤器中的totalCounterValue匹配。希望它可以工作 –

+0

不幸的是,即使这不起作用。它现在正在对FILE_BYTES_WRITTEN而不是FILE_BYTES_READ进行排序,也许我们需要做一些事情,因为counterGroup是一个数组,counter是一个数组所以双重嵌套? – rg41