2017-08-22 88 views
0

我使用Mongo v2.2.0。

我写了查询,但主要问题是$ arrayElemAt。使用$ unwind- $ first进行标准替换对我来说不起作用,我认为存在更好的解决方案。我有限制运行此聚合管道作为一个单一的操作,而不是运行查询积极负数数据和后来合并结果的代码。我需要对结果查询应用$ sort,$ limit和$ skip,以限制计数words用于过滤来自其他集合的记录,并在Java代码中合并来自两个集合的数据。

聚集查询:

[ 
    { 
    $match: { 
     "merchantId": ObjectId("59520e6ccc7a701fbed31f94"), 
     "date": { 
     "$gte": NumberLong(1389644800000), 
     "$lt": NumberLong(1502409599999) 
     }, 
     "isbn": "a123", 

    } 
    }, 
    { 
    $project: { 
     "word": 1, 
     "sentence": 1, 
     "type": 1, 
     "date": 1 
    } 
    }, 
    { 
    $sort: { 
     "date": -1 
    } 
    }, 
    { 
    $group: { 
     "_id": { 
     "word": "$word", 
     "type": "$type" 
     }, 
     "date": { 
      $max: "$date" 
     }, 
     "sentence": { 
     $first: "$sentence" 
     }, 
     "sentenceCount": { 
     "$sum": 1 
     } 
    },  
    }, 
    { 
    $group: { 
      "_id": "$_id.word", 
      "word": { $first: "$_id.word"}, 
      "positiveCount": {$sum: {$cond: [{$eq: ["$_id.type", "positive"]}, "$sentenceCount", 0]}}, 
      "count": {$sum: "$sentenceCount"}, 
      "positiveSentence": { 
       "$push": { 
        "$cond": [{$eq: ["$_id.type", "positive"]}, "$sentence", "$noval"] 
       } 
      }, 
      "negativeSentence": { 
       "$push": { 
        "$cond": [{$eq: ["$_id.type", "negative"]}, "$sentence", "$noval"] 
       } 
      } 
    } 
    }, 
    { 
    $project: { 
      "_id": 0, 
      "word": 1, 
      "sentimentPercentage": {$cond: [{$eq: ["$count", 0]}, 0, {$multiply: [{$divide: ["$positiveCount", "$count"]}, 100]}]}, 
      "positiveSentence": {$arrayElemAt: ["$positiveSentence", 0]}, 
      "negativeSentence": {$arrayElemAt: ["$negativeSentence", 0]}, 
    } 
    }, 
    { 
    $sort: { 
      sentimentPercentage: -1 
    } 
    }, 
    { 
    $limit: 50 
    } 
] 

收集文件 “纲目”:

{ 
    "_id" : ObjectId("59887424e4b099e00724aa44"), 
    "merchantId" : ObjectId("59520e6ccc7a701fbed31f94"), 
    "isbn" : "a123", 
    "sentence" : "Great, friendly service.", 
    "word" : "service", 
    "type" : "positive", 
    "date" : NumberLong(1466809200000),  
} 

预期输出:

{ 
    "word" : "expectations", 
    "sentimentPercentage" : 100.0, 
    "positiveSentence" : "The service exceeded our expectations." 
}, 
{ 
    "word" : "representative", 
    "sentimentPercentage" : 87.5, 
    "positiveSentence" : "Excellent local representative, met the flight and gave us all the relevant information to ensure a great holiday.", 
    "negativeSentence" : "The representative at resort was poor." 
}, 
{ 
    "word" : "seats", 
    "sentimentPercentage" : 0.0, 
    "negativeSentence" : "Long delay and pre booked seats were lost ." 
} 

请,你能告诉我如何替换$ arrayElemAt运营商或甚至更好的如何使用Mongo的功能来优化这个查询到所需的输出= 2.2.0?

+0

你能否提供一些样本数据和你想要的输出? – dnickless

+0

@无需我提供了预期输出的示例 – Ray

回答

1

这似乎给我合理的结果。我认为这将无法正常工作,不过,在你有没有积极的或者是因为它不支持V2.2的preserveNullAndEmptyArrays参数$unwind阶段没有否定句的情况下...

db.getCollection('test').aggregate([ 
    { 
    $project: { 
     "word": 1, 
     "sentence": 1, 
     "type": 1, 
     "date": 1 
    } 
    }, 
    { 
    $sort: { 
     "date": -1 
    } 
    }, 
    { 
    $group: { 
     "_id": { 
     "word": "$word", 
     "type": "$type" 
     }, 
     "date": { 
      $max: "$date" 
     }, 
     "sentence": { 
     $first: "$sentence" 
     }, 
     "sentenceCount": { 
     "$sum": 1 
     } 
    },  
    }, 
    { 
    $group: { 
      "_id": "$_id.word", 
      "word": { $first: "$_id.word"}, 
      "positiveCount": {$sum: {$cond: [{$eq: ["$_id.type", "positive"]}, "$sentenceCount", 0]}}, 
      "count": {$sum: "$sentenceCount"}, 
      "positiveSentence": { 
       "$push": { 
        "$cond": [{$eq: ["$_id.type", "positive"]}, "$sentence", "$noval"] 
       } 
      }, 
      "negativeSentence": { 
       "$push": { 
        "$cond": [{$eq: ["$_id.type", "negative"]}, "$sentence", "$noval"] 
       } 
      } 
    } 
    }, 
    { $unwind: "$positiveSentence" }, 
    { $group: 
     { 
      "_id": "$_id", 
      "word": { $first: "$word" }, 
      "count": { $first: "$count" }, 
      "positiveCount": { $first: "$positiveCount" }, 
      "positiveSentence": { $first: "$positiveSentence" }, 
      "negativeSentence": { $first: "$negativeSentence" }, 
     } 
    }, 
    { $unwind: "$negativeSentence" }, 
    { $group: 
     { 
      "_id": "$_id", 
      "word": { $first: "$word" }, 
      "count": { $first: "$count" }, 
      "positiveCount": { $first: "$positiveCount" }, 
      "positiveSentence": { $first: "$positiveSentence" }, 
      "negativeSentence": { $first: "$negativeSentence" }, 
     } 
    }, 
    { 
    $project: { 
      "_id": 0, 
      "word": 1, 
      "sentimentPercentage": {$cond: [{$eq: ["$count", 0]}, 0, {$multiply: [{$divide: ["$positiveCount", "$count"]}, 100]}]}, 
      "positiveSentence": 1, 
      "negativeSentence": 1 
    } 
    } 
]) 

你可能会能够进一步简化,例如摆脱第一个投影和分组阶段。如果你愿意,我可以在几个小时内查看。

+0

幸运的是,我发现我们将db版本升级到3.4.4+。但我仍然对如何解决这个难题感兴趣。不要着急,它会等待 – Ray