2

我有一个蒙戈分片群集,其中我从一个虚拟机监控系统(ZABBIX ECC)保存数据。现在我想从数据库中获取一些信息,例如在一个虚拟机的最后2天内使用avg memfree。 我读了关于聚合的教程和与sql的比较,但我不明白如何查询时间系列文档(如mongo在网络研讨会上所写的建议)。如何聚合时间序列文件MongoDB中

例子:我有一个集合与许多这些文档的(一个DOC表示1小时):

"_id" : ObjectId("5558ab960e8956234285de14"), 
    "timestamp_hour" : ISODate("2014-10-13T23:00:00.000+02:00"), 
    "name" : "memfree", 
    "unity" : "B", 
    "values" : { 
     "0" : { 
      "0" : 2041004032.0000000000000000, 
      "1" : 2041004032.0000000000000000, 
      "2" : 2041004032.0000000000000000, 
      "3" : 2041004032.0000000000000000, 
      "4" : 2041004032.0000000000000000, 
      "5" : 2041004032.0000000000000000, 
      "6" : 2041004032.0000000000000000, 
      "7" : 2041004032.0000000000000000, 
      "8" : 2041004032.0000000000000000, 
      "9" : 2041004032.0000000000000000, 
      "10" : 2041004032.0000000000000000, 
      "11" : 2041004032.0000000000000000, 
      "12" : 2041004032.0000000000000000, 
      "13" : 2041004032.0000000000000000, 
      "14" : 2041004032.0000000000000000, 
      "15" : 2041004032.0000000000000000, 
      "16" : 2041004032.0000000000000000, 
      "17" : 2041004032.0000000000000000, 
      "18" : 2041004032.0000000000000000, 
      "19" : 2041004032.0000000000000000, 
      "20" : 2041004032.0000000000000000, 
      "21" : 2041004032.0000000000000000, 
      "22" : 2041004032.0000000000000000, 
      "23" : 2041004032.0000000000000000, 
      "24" : 2041004032.0000000000000000, 
      "25" : 2041004032.0000000000000000, 
      "26" : 2041004032.0000000000000000, 
      "27" : 2041004032.0000000000000000, 
      "28" : 2041004032.0000000000000000, 
      "29" : 2041004032.0000000000000000, 
      "30" : 2041004032.0000000000000000, 
      "31" : 2041004032.0000000000000000, 
      "32" : 2041004032.0000000000000000, 
      "33" : 2041004032.0000000000000000, 
      "34" : 2041004032.0000000000000000, 
      "35" : 2041004032.0000000000000000, 
      "36" : 2041004032.0000000000000000, 
      "37" : 2041004032.0000000000000000, 
      "38" : 2041004032.0000000000000000, 
      "39" : 2041004032.0000000000000000, 
      "40" : 2041004032.0000000000000000, 
      "41" : 2041004032.0000000000000000, 
      "42" : 2041004032.0000000000000000, 
      "43" : 2041004032.0000000000000000, 
      "44" : 2041004032.0000000000000000, 
      "45" : 2041004032.0000000000000000, 
      "46" : 2041004032.0000000000000000, 
      "47" : 2041004032.0000000000000000, 
      "48" : 2041004032.0000000000000000, 
      "49" : 2041004032.0000000000000000, 
      "50" : 2041004032.0000000000000000, 
      "51" : 2041004032.0000000000000000, 
      "52" : 2041004032.0000000000000000, 
      "53" : 2041004032.0000000000000000, 
      "54" : 2041004032.0000000000000000, 
      "55" : 2041004032.0000000000000000, 
      "56" : 2041004032.0000000000000000, 
      "57" : 2041004032.0000000000000000, 
      "58" : 2041004032.0000000000000000, 
      "59" : 2041004032.0000000000000000 
     }, 
     "1" : { 
      "0" : 2041004032.0000000000000000, 
      "1" : 2041004032.0000000000000000, 
      "2" : 2041004032.0000000000000000, 
      "3" : 2041004032.0000000000000000, 
      "4" : 2041004032.0000000000000000, 
      "5" : 2041004032.0000000000000000, 
      "6" : 2041004032.0000000000000000, 
      "7" : 2041004032.0000000000000000, 
      "8" : 2041004032.0000000000000000, 
      "9" : 2041004032.0000000000000000, 
      "10" : 2041004032.0000000000000000, 
      "11" : 2041004032.0000000000000000, 
      "12" : 2041004032.0000000000000000, 
      "13" : 2041004032.0000000000000000, 
      "14" : 2041004032.0000000000000000, 
      "15" : 2041004032.0000000000000000, 
      "16" : 2041004032.0000000000000000, 
      "17" : 2041004032.0000000000000000, 
      "18" : 2041004032.0000000000000000, 
      "19" : 2041004032.0000000000000000, 
      "20" : 2041004032.0000000000000000, 
      "21" : 2041004032.0000000000000000, 
      "22" : 2041004032.0000000000000000, 
      "23" : 2041004032.0000000000000000, 
      "24" : 2041004032.0000000000000000, 
      "25" : 2041004032.0000000000000000, 
      "26" : 2041004032.0000000000000000, 
      "27" : 2041004032.0000000000000000, 
      "28" : 2041004032.0000000000000000, 
      "29" : 2041004032.0000000000000000, 
      "30" : 2041004032.0000000000000000, 
      "31" : 2041004032.0000000000000000, 
      "32" : 2041004032.0000000000000000, 
      "33" : 2041004032.0000000000000000, 
      "34" : 2041004032.0000000000000000, 
      "35" : 2041004032.0000000000000000, 
      "36" : 2041004032.0000000000000000, 
      "37" : 2041004032.0000000000000000, 
      "38" : 2041004032.0000000000000000, 
      "39" : 2041004032.0000000000000000, 
      "40" : 2041004032.0000000000000000, 
      "41" : 2041004032.0000000000000000, 
      "42" : 2041004032.0000000000000000, 
      "43" : 2041004032.0000000000000000, 
      "44" : 2041004032.0000000000000000, 
      "45" : 2041004032.0000000000000000, 
      "46" : 2041004032.0000000000000000, 
      "47" : 2041004032.0000000000000000, 
      "48" : 2041004032.0000000000000000, 
      "49" : 2041004032.0000000000000000, 
      "50" : 2041004032.0000000000000000, 
      "51" : 2041004032.0000000000000000, 
      "52" : 2041004032.0000000000000000, 
      "53" : 2041004032.0000000000000000, 
      "54" : 2041004032.0000000000000000, 
      "55" : 2041004032.0000000000000000, 
      "56" : 2041004032.0000000000000000, 
      "57" : 2041004032.0000000000000000, 
      "58" : 2041004032.0000000000000000, 
      "59" : 2041004032.0000000000000000 
.... 

我想知道从平均memfree '2014-10-13T23:00:00.000' 以'2014-10-15T23:00:00.000'。所以我需要总结从13到15(3600 * 24 * 2值)的所有值。 我想查询将是这样的,但我不知道怎么解释的平均命令.....

db.metrics.aggregate([ 
    { $match: { name: 'memfree' ,timestamp_hour:{$gte: ISODate("2014-10-13T23:00:00.000+02:00")},timestamp_hour:{$lte: ISODate("2014-10-15T23:00:00.000+02:00")} } }, 
    { 
    $group: { 
     _id: "$name", 
     avg: { $avg: "how can get all the values??" } 
    } 
    } 
]) 

什么建议吗?

感谢

编辑: 正确答案(适用于一对多指标)是:

map = function() { 
    for (var min in this.values) 
    for (sec in this.values[min]){ 
     data = {value: {}, count: {}} 
     data.value[this.name] = this.values[min][sec] 
     data.count[this.name] = 1 
     emit(this.name, data); 
    } 
} 

reduce = function(key, values) { 
    var sum = values.reduce(function(a, b) { 
    out = {value: {}, count: {},avg:0} 
    for (k in b.value){ 
     incount = a.count[k] || 0 
     invalue = a.value[k] || 0 
     out.value[k] = b.value[k]+invalue 
     out.count[k] = b.count[k]+incount 
    } 
    out.avg = out.value[k]/out.count[k] 
    return out 
    }); 
    return sum; 
} 


printjson(db.node0208_26608.mapReduce(map, reduce, 
       { 
       query: { name: {$in:['ioutil','memtotal','memfree']} , 
          timestamp_hour:{$gte: ISODate("2014-09-22T00:00:00.000+02:00")}, 
          timestamp_hour:{$lte: ISODate("2014-09-28T23:00:00.000+02:00")} 
         }, 
       //to write directly on a collection 
       //out:{merge: "map_reduce_out"}, 
      out: {inline:1}, 
      verbose:true 
       }) 
) 

会产生这样的结果:

{ 
     "results" : [ 
       { 
         "_id" : "ioutil", 
         "value" : { 
           "value" : { 
             "ioutil" : 2495762.106280909 
           }, 
           "count" : { 
             "ioutil" : 601200 
           }, 
           "avg" : 4.15130090865088 
         } 
       }, 
       { 
         "_id" : "memfree", 
         "value" : { 
           "value" : { 
             "memfree" : 28500447903744 
           }, 
           "count" : { 
             "memfree" : 601200 
           }, 
           "avg" : 47405934.636966065 
         } 
       }, 
       { 
         "_id" : "memtotal", 
         "value" : { 
           "value" : { 
             "memtotal" : 635834327040000 
           }, 
           "count" : { 
             "memtotal" : 594000 
           }, 
           "avg" : 1070428160 
         } 
       } 
     ], 
     "counts" : { 
       "input" : NumberLong(499), 
       "emit" : NumberLong(1796400), 
       "reduce" : NumberLong(11), 
       "output" : NumberLong(3) 
     }, 
     "timeMillis" : 37956, 
     "timing" : { 
       "shardProcessing" : 37948, 
       "postProcessing" : 8 
     }, 
     "shardCounts" : { 
       "192.168.0.19:27017" : { 
         "input" : 165, 
         "emit" : 594000, 
         "reduce" : 4, 
         "output" : 1 
       }, 
       "192.168.0.20:27017" : { 
         "input" : 334, 
         "emit" : 1202400, 
         "reduce" : 7, 
         "output" : 2 
       } 
     }, 
     "postProcessCounts" : { 
       "192.168.0.21:27017" : { 
         "input" : NumberLong(3), 
         "reduce" : NumberLong(0), 
         "output" : NumberLong(3) 
       } 
     }, 
     "ok" : 1 
} 
+0

你可能想要考虑这个[**答案**](http://stackoverflow.com/a/30304776/)。 – chridam

回答

1

这将是很难使用,以实现聚合框架。但它与MapReduce很好地协作。东西沿线(未经测试):

// collect *individual* values 
map = function() { 
    for (var min in this.values) 
    for (sec in this.values[min]) 
     data = {value: {}, count: {}} 
     data.value[this.name] = this.values[min][sec] 
     data.count[this.name] = 1 
     emit(null, data); 
} 

// sum values and count 
reduce = function(key, values) { 
    var sum = values.reduce(function(a, b) { 
    out = {value: {}, count: {}} 
    for (k in b.value) 
     incount = a.count[k] || 0 
     invalue = a.value[k] || 0 
     out.value[k] = b.value[k]+invalue 
     out.count[k] = b.count[k]+incount 

    return out 
    }); 
    return sum; 
} 
+0

好的......如果我想为3个度量AVG(memfree,cputil,diskfree)我有不同的查询重复相同的常规3倍或有可能把或查询组的结果在最后陈述?也许用emit(this.name,this.values [min] [sec]); ? – SUPERALEX

+0

@SUPERALEX我没有时间写详细信息,但我已经编辑了我的答案 –

+0

谢谢第一个解决方案的作品;我稍微改正了第二个。只有我不明白的是,如果我使用相同的度量标准运行这两个解决方案,它会给略微不同的平均值,但只有当度量标准不具有整数值时...我认为存在一些舍入问题,但我不在乎很多 – SUPERALEX