2012-10-09 75 views
1

我在蒙戈以下集合:翻译SQL查询的MongoDB的MapReduce

{ 
    "_id" : ObjectId("506217890b50f300d020d237"), 
    "o_orderkey" : NumberLong(1), 
    "o_orderstatus" : "O", 
    "o_totalprice" : 173665.47, 
    "o_orderdate" : ISODate("1996-01-02T02:00:00Z"), 
    "o_orderpriority" : "5-LOW", 
    "o_clerk" : "Clerk#000000951", 
    "o_shippriority" : 0, 
    "o_comment" : "blithely final dolphins solve-- blithely blithe packages nag blith", 
    "customer" : { 
      "c_custkey" : NumberLong(36901), 
      "c_name" : "Customer#000036901", 
      "c_address" : "TBb1yDZcf 8Zepk7apFJ", 
      "c_phone" : "23-644-998-4944", 
      "c_acctbal" : 4809.84, 
      "c_mktsegment" : "AUTOMOBILE", 
      "c_comment" : "regular accounts after the blithely pending dependencies play blith", 
      "c_nationkey" : { 
        "n_nationkey" : NumberLong(13), 
        "n_name" : "JORDAN", 
        "n_comment" : "blithe, express deposits boost carefully busy accounts. furiously pending depos", 
        "n_regioin" : { 
          "r_regionkey" : NumberLong(4), 
          "r_name" : "MIDDLE EAST", 
          "r_comment" : "furiously unusual packages use carefully above the unusual, exp" 
        } 
      } 
    }, 
    "o_lineitem" : [ 
     { 
      "l_linenumber" : 1, 
      "l_quantity" : 17, 
      "l_extendedprice" : 21168.23, 
      "l_discount" : 0.04, 
      "l_tax" : 0.02, 
      "l_returnflag" : "N", 
      "l_linestatus" : "O", 
      "l_shipdate" : ISODate("1996-03-13T03:00:00Z"), 
      "l_commitdate" : ISODate("1996-02-12T03:00:00Z"), 
      "l_receiptdate" : ISODate("1996-03-22T03:00:00Z"), 
      "l_shipinstruct" : "DELIVER IN PERSON", 
      "l_shipmode" : "TRUCK", 
      "l_comment" : "blithely regular ideas caj", 
      "partsupp" : { 
       "ps_availqty" : 6157, 
       "ps_supplycost" : 719.17, 
       "ps_comment" : "blithely ironic packages haggle quickly silent platelets. silent packages must have to nod. slyly special theodolites along the blithely ironic packages nag above the furiously pending acc", 
       "ps_partkey" : { 
        "p_partkey" : NumberLong(155190), 
        "p_name" : "slate lavender tan lime lawn", 
        "p_mfgr" : "Manufacturer#4", 
        "p_brand" : "Brand#44", 
        "p_type" : "PROMO BRUSHED NICKEL", 
        "p_size" : 9, 
        "p_container" : "JUMBO JAR", 
        "p_retailprice" : 1245.19, 
        "p_comment" : "regular, final dol" 
       }, 
       "ps_suppkey" : { 
        "s_suppkey" : NumberLong(7706), 
        "s_name" : "Supplier#000007706", 
        "s_address" : "BlHq75VoMNCoU380SGiS9fTWbGpeI", 
        "s_phone" : "33-481-218-6643", 
        "s_acctbal" : -379.71, 
        "s_comment" : "carefully pending ideas after the instructions are alongside of the dolphins. slyly pe", 
        "s_nationkey" : { 
         "n_nationkey" : NumberLong(23), 
         "n_name" : "UNITED KINGDOM", 
         "n_comment" : "fluffily regular pinto beans breach according to the ironic dolph", 
         "n_regioin" : { 
          "r_regionkey" : NumberLong(3), 
          "r_name" : "EUROPE", 
          "r_comment" : "special, bold deposits haggle foxes. platelet" 
         } 
        } 
       } 
      } 
     }, 
     . 
     . 
     . 
    ] 
} 

而且我尝试翻译下面的SQL查询:

select 
    s_acctbal, 
    s_name, 
    n_name, 
    p_partkey, 
    p_mfgr, 
    s_address, 
    s_phone, 
    s_comment 
from 
    part, 
    supplier, 
    partsupp, 
    nation, 
    region 
where 
    p_partkey = ps_partkey 
    and s_suppkey = ps_suppkey 
    and p_size = 15 
    and p_type like '%BRASS' 
    and s_nationkey = n_nationkey 
    and n_regionkey = r_regionkey 
    and r_name = 'EUROPE' 
    and ps_supplycost = (
     select 
      min(ps_supplycost) 
     from 
      partsupp, supplier, 
      nation, region 
     where 
      p_partkey = ps_partkey 
      and s_suppkey = ps_suppkey 
      and s_nationkey = n_nationkey 
      and n_regionkey = r_regionkey 
      and r_name = 'EUROPE' 
    ) 
order by 
    s_acctbal desc, 
    n_name, 
    s_name, 
    p_partkey; 

我的功能,我是想:

db.runCommand({ 
    mapreduce: "ordersfull", 
    query: { 
    }, 
    map: function Map() { 
     var pattern = /BRASS$/g; 

     for(var i in this.o_lineitem){ 
      var p_size = this.o_lineitem[i].partsupp.ps_partkey.p_size; 
      var p_type = this.o_lineitem[i].partsupp.ps_partkey.p_type; 
      var region = this.o_lineitem[i].partsupp.ps_suppkey.s_nationkey.n_regioin.r_name; 

      if(p_size==15 && p_type.match(pattern)!=null && region == "EUROPE"){ 
       emit("",{ 
        s_acctbal: this.o_lineitem[i].partsupp.ps_suppkey.s_acctbal, 
        s_name: this.o_lineitem[i].partsupp.ps_suppkey.s_name, 
        n_name: this.o_lineitem[i].partsupp.ps_suppkey.s_nationkey.n_name, 
        p_partkey: this.o_lineitem[i].partsupp.ps_partkey.p_partkey, 
        p_mfgr: this.o_lineitem[i].partsupp.ps_partkey.p_mfgr, 
        s_address: this.o_lineitem[i].partsupp.ps_suppkey.s_address, 
        s_phone: this.o_lineitem[i].partsupp.ps_suppkey.s_phone, 
        s_comment: this.o_lineitem[i].partsupp.ps_suppkey.s_comment 
       }); 
      } 

     } 
    }, 
    reduce: function(key, values) { 
    }, 
    out: 'query002' 
}); 

在我的结果我得到了所有条目空值,发生什么事?

+0

我会尝试逐步建立查询(即开始一些基本的工作,然后在小步骤中增加复杂性)。我注意到你在你的地图(),没有减少()函数的发射空白键,所以这可能是问题的一部分。根据结果​​集的大小,您可能还想考虑在MongoDB 2.2中使用新的[Aggregation Framework](http://docs.mongodb.org/manual/applications/aggregation/)。 – Stennie

+0

需要汇总多少个文档?你使用的是什么版本的MongoDB? – Stennie

+0

嗨Stennie,打扰一下,我是在度假。谢谢各位宝贵的提示。我使用mongo版本2.0.6。而我的收藏品有大约6百万个物品。 – ulima69

回答

0

您可以通过在JavaScript函数print()printjson()语句调试MapReduce的输出。最终的打印输出将保存在MongoDB日志中。

有几个问题与您的MapReduce:

  • for .. in循环将not work as you expect ..你应该使用array.forEach(..)
  • 如果你遍历数组,你已经有了一个对数组项目的引用而不应使用array[index]
  • 你应该emit()具有独特的键名,如果你不希望意外分组做
  • reduce()寿LD返回匹配的发射数据的结构值
  • 您应使用query参数限制了需要检查

鉴于你似乎只是迭代文档,而无需做任何分组或reduce()文件,您可能会发现在您的应用程序代码中获取文档并执行相同的匹配更容易。

在任何情况下,map()功能实际上应该更像:

var map = function() { 
    var pattern = /BRASS$/; 

    this.o_lineitem.forEach(function(item) { 
     var partKey = item.partsupp.ps_partkey; 
     var suppKey = item.partsupp.ps_suppkey; 

     var region = suppKey.s_nationkey.n_regioin.r_name; 

     if (partKey.p_size==15 && partKey.p_type.match(pattern) !=null && region == "EUROPE") { 
      emit(suppKey.s_name, 
       { 
        s_acctbal: suppKey.s_acctbal, 
        s_name: suppKey.s_name, 
        n_name: suppKey.s_nationkey.n_name, 
        p_partkey: partKey.p_partkey, 
        p_mfgr: partKey.p_mfgr, 
        s_address: suppKey.s_address, 
        s_phone: suppKey.s_phone, 
        s_comment: suppKey.s_comment 
       } 
      ); 
     } 
    }) 
} 

它会更容易,因为你的数据结构和所需的多个匹配翻译该查询到的MongoDB 2.2新Aggregation Framework和排序。

有一些current limitations需要注意(例如Aggregation管道输出的当前最大16MB),但您可能会发现查询更易于创建和调试。

下面是一个使用聚合框架一个注释过的例子,其中包括初始匹配标准订单状态,日期和部件/感兴趣的供应商项目:

db.ordersfull.aggregate(
    // Find matching documents first (can take advantage of index) 
    { $match: { 
     o_orderstatus: 'O', 
     o_orderdate: { $gte: new ISODate('2012-10-01') }, 
     $and: [ 
      { o_lineitem: { $elemMatch: { 'partsupp.ps_partkey.p_size': 15 }} }, 
      { o_lineitem: { $elemMatch: { 'partsupp.ps_partkey.p_type': { $exists : true } }} },   
      { o_lineitem: { $elemMatch: { 'partsupp.ps_suppkey.s_nationkey.n_regioin.r_name': 'EUROPE'}} } 
     ] 
    }}, 

    // Filter to fields of interest 
    { $project: { 
     _id: 0, 
     o_lineitem: 1 
    }}, 

    // Convert line item arrays into document stream 
    { $unwind: '$o_lineitem' }, 

    // Match desired line items 
    { $match: { 
     'o_lineitem.partsupp.ps_partkey.p_size': 15, 
     'o_lineitem.partsupp.ps_partkey.p_type': /BRASS$/, 
     'o_lineitem.partsupp.ps_suppkey.s_nationkey.n_regioin.r_name': 'EUROPE' 
    }}, 

    // Final field selection 
    { $project: { 
     s_acctbal: '$o_lineitem.partsupp.ps_suppkey.s_acctbal', 
     s_name: '$o_lineitem.partsupp.ps_suppkey.s_name', 
     n_name: '$o_lineitem.partsupp.ps_suppkey.s_nationkey.n_name', 
     p_partkey: '$o_lineitem.partsupp.ps_partkey.p_partkey', 
     p_mfgr: '$o_lineitem.partsupp.ps_partkey.p_mfgr', 
     s_address: '$o_lineitem.partsupp.ps_suppkey.s_address', 
     s_phone: '$o_lineitem.partsupp.ps_suppkey.s_phone', 
     s_comment: '$o_lineitem.partsupp.ps_suppkey.s_comment' 
    }}, 

    // Sort the output 
    { $sort: { 
     s_acctbal: -1, 
     n_name: 1, 
     s_name: 1, 
     p_partkey: 1 
    }} 
) 
+0

非常感谢你帮助了我很多。 – ulima69