2015-05-06 133 views
1

我遇到一些奇怪的行为我无法理解数组元素的顺序。 鉴于收集与以下“的架构”的文件:多键索引:在查询

{ 
tag : ["t:someTag", "A", "B", "C"] 
msg : "some message" 
timestamp : ISODate(...) 
someIntField: 1 
} 

标签在开始元素的数组“T:东西”,后弦标签的任意数量。 收集统计:

db.perf_multikey.stats() 
{ 
     "ns" : "test.perf_multikey", 
     "count" : 36239306, 
     "size" : 22124848112, 
     "avgObjSize" : 610, 
     "storageSize" : 24330923904, 
     "numExtents" : 32, 
     "nindexes" : 4, 
     "lastExtentSize" : 2146426864, 
     "paddingFactor" : 1, 
     "systemFlags" : 1, 
     "userFlags" : 1, 
     "totalIndexSize" : 17494579648, 
     "indexSizes" : { 
       "_id_" : 1177303120, 
       "tag_1" : 12851094032, 
       "timestamp_1" : 1800706768, 
       "level_1" : 1665475728 
     }, 
     "ok" : 1 
} 

我执行以下查询:

db.perf_multikey.find({tag: {$all:["t:a", "J"]}}) 

正如预期它击中指数和返回几排:

db.perf_multikey.find({tag: {$all:["t:a", "J"]}}).explain() 
{ 
     "cursor" : "BtreeCursor tag_1", 
     "isMultiKey" : true, 
     "n" : 6, 
     "nscannedObjects" : 10, 
     "nscanned" : 10, 
     "nscannedObjectsAllPlans" : 10, 
     "nscannedAllPlans" : 22, 
     "scanAndOrder" : false, 
     "indexOnly" : false, 
     "nYields" : 1, 
     "nChunkSkips" : 0, 
     "millis" : 7, 
     "indexBounds" : { 
       "tag" : [ 
         [ 
           "t:a", 
           "t:a" 
         ] 
       ] 
     }, 
     "server" : "somefancyserver:27017", 
     "filterSet" : false 
} 

但查询,只有不同在标签阵列元素的顺序

db.perf_multikey.find({tag: {$all:["J","t:a"]}}) 

似乎不使用索引

db.perf_multikey.find({tag: {$all:["J","t:a"]}}).explain() 
{ 
     "cursor" : "Complex Plan", 
     "n" : 6, 
     "nscannedObjects" : 0, 
     "nscanned" : 7866684, 
     "nscannedObjectsAllPlans" : 7827833, 
     "nscannedAllPlans" : 15694517, 
     "nYields" : 139716, 
     "nChunkSkips" : 0, 
     "millis" : 118102, 
     "server" : "samefancyserver:27017", 
     "filterSet" : false 
} 

我使用MongoDB的2.6.9 看到上述结果我很困惑如何MongoDB的多键索引原理。为什么使用数组的查询如此依赖于顺序?

编辑:

升级到MongoDB的3.0.2后,我重新生成数据集,并重新运行测试(尺寸足够大,指数不会在RAM适合的)。 不幸的是我仍然遇到了同样的结果(请介意标签场以下某种“模式”的 - 数组的第一个元素是任意的字符串后面的标签部分置换 - 从值有限的宇宙,说“A” - “J”)。

这些是我的结果:

快如闪电:

慢一个
> db.perf_multikey.find({tag : {$all : ["a", "J"]}}).explain() 
{ 
     "queryPlanner" : { 
       "plannerVersion" : 1, 
       "namespace" : "test.perf_multikey", 
       "indexFilterSet" : false, 
       "parsedQuery" : { 
         "$and" : [ 
           { 
             "tag" : { 
               "$eq" : "a" 
             } 
           }, 
           { 
             "tag" : { 
               "$eq" : "J" 
             } 
           } 
         ] 
       }, 
       "winningPlan" : { 
         "stage" : "KEEP_MUTATIONS", 
         "inputStage" : { 
           "stage" : "FETCH", 
           "filter" : { 
             "tag" : { 
               "$eq" : "J" 
             } 
           }, 
           "inputStage" : { 
             "stage" : "IXSCAN", 
             "keyPattern" : { 
               "tag" : 1 
             }, 
             "indexName" : "tag_1", 
             "isMultiKey" : true, 
             "direction" : "forward", 
             "indexBounds" : { 
               "tag" : [ 
                 "[\"a\", \"a\"]" 
               ] 
             } 
           } 
         } 
       }, 
       "rejectedPlans" : [ 
         { 
           "stage" : "FETCH", 
           "inputStage" : { 
             "stage" : "KEEP_MUTATIONS", 
             "inputStage" : { 
               "stage" : "AND_SORTED", 
               "inputStages" : [ 
                 { 
                   "stage" : "IXSCAN", 
                   "keyPattern" : { 
                     "tag" : 1 
                   }, 
                   "indexName" : "tag_1", 
                   "isMultiKey" : true, 
                   "direction" : "forward", 
                   "indexBounds" : { 
                     "tag" : [ 
                       "[\"a\", \"a\"]" 
                     ] 
                   } 
                 }, 
                 { 
                   "stage" : "IXSCAN", 
                   "keyPattern" : { 
                     "tag" : 1 
                   }, 
                   "indexName" : "tag_1", 
                   "isMultiKey" : true, 
                   "direction" : "forward", 
                   "indexBounds" : { 
                     "tag" : [ 
                       "[\"J\", \"J\"]" 
                     ] 
                   } 
                 } 
               ] 
             } 
           } 
         } 
       ] 
     }, 
     "serverInfo" : { 
       "host" : "fancyhost", 
       "port" : 27017, 
       "version" : "3.0.2", 
       "gitVersion" : "6201872043ecbbc0a4cc169b5482dcf385fc464f" 
     }, 
     "ok" : 1 
} 

> db.perf_multikey.find({tag : {$all : ["J", "a"]}}).explain() 
{ 
     "queryPlanner" : { 
       "plannerVersion" : 1, 
       "namespace" : "test.perf_multikey", 
       "indexFilterSet" : false, 
       "parsedQuery" : { 
         "$and" : [ 
           { 
             "tag" : { 
               "$eq" : "J" 
             } 
           }, 
           { 
             "tag" : { 
               "$eq" : "a" 
             } 
           } 
         ] 
       }, 
       "winningPlan" : { 
         "stage" : "FETCH", 
         "inputStage" : { 
           "stage" : "KEEP_MUTATIONS", 
           "inputStage" : { 
             "stage" : "AND_SORTED", 
             "inputStages" : [ 
               { 
                 "stage" : "IXSCAN", 
                 "keyPattern" : { 
                   "tag" : 1 
                 }, 
                 "indexName" : "tag_1", 
                 "isMultiKey" : true, 
                 "direction" : "forward", 
                 "indexBounds" : { 
                   "tag" : [ 
                     "[\"J\", \"J\"]" 
                   ] 
                 } 
               }, 
               { 
                 "stage" : "IXSCAN", 
                 "keyPattern" : { 
                   "tag" : 1 
                 }, 
                 "indexName" : "tag_1", 
                 "isMultiKey" : true, 
                 "direction" : "forward", 
                 "indexBounds" : { 
                   "tag" : [ 
                     "[\"a\", \"a\"]" 
                   ] 
                 } 
               } 
             ] 
           } 
         } 
       }, 
       "rejectedPlans" : [ 
         { 
           "stage" : "KEEP_MUTATIONS", 
           "inputStage" : { 
             "stage" : "FETCH", 
             "filter" : { 
               "tag" : { 
                 "$eq" : "a" 
               } 
             }, 
             "inputStage" : { 
               "stage" : "IXSCAN", 
               "keyPattern" : { 
                 "tag" : 1 
               }, 
               "indexName" : "tag_1", 
               "isMultiKey" : true, 
               "direction" : "forward", 
               "indexBounds" : { 
                 "tag" : [ 
                   "[\"J\", \"J\"]" 
                 ] 
               } 
             } 
           } 
         } 
       ] 
     }, 
     "serverInfo" : { 
       "host" : "fancyhost", 
       "port" : 27017, 
       "version" : "3.0.2", 
       "gitVersion" : "6201872043ecbbc0a4cc169b5482dcf385fc464f" 
     }, 
     "ok" : 1 
} 

我尽管这http://docs.mongodb.org/manual/reference/operator/query/all/#performance可能是答案。

毕竟,通过[“随机字符串”,“A”]查询使用“随机字符串”缩小可能的结果设置为非常小的尺寸,从而容易扫描(?或进一步移动)。 另一方面,通过[“A”,“随机字符串”]查询应该是慢的,因为“A”将返回巨大的集合用于进一步扫描...但查询[“A”,“随机不存在的字符串”]是闪电快......这让我感到困惑。

+0

它的目的不在于:http://docs.mongodb.org/manual/reference/operator/query/all/#use-all-to-match-values嗯需要调查 – Sammaye

回答

0

我会强烈建议升级。我在2.6.1和3.0.0上测试了这个,我没有得到这种行为。

例如,这里是2.6。1:

> db.t.find({tags:{$all:['t', 'tags']}}).explain() 
{ 
     "cursor" : "BtreeCursor tags_1", 
     "isMultiKey" : true, 
     "n" : 1, 
     "nscannedObjects" : 1, 
     "nscanned" : 1, 
     "nscannedObjectsAllPlans" : 1, 
     "nscannedAllPlans" : 3, 
     "scanAndOrder" : false, 
     "indexOnly" : false, 
     "nYields" : 0, 
     "nChunkSkips" : 0, 
     "millis" : 7, 
     "indexBounds" : { 
       "tags" : [ 
         [ 
           "t", 
           "t" 
         ] 
       ] 
     }, 
     "server" : "ubuntu:27017", 
     "filterSet" : false 
} 
> db.t.find({tags:{$all:['t', 'tags']}}) 
{ "_id" : ObjectId("5549f186450548aed9ad4273"), "tags" : [ "tags", "t" ] } 

甚至与一个不存在的价值首先:

> db.t.find({tags:{$all:['f', 'tags']}}).explain() 
{ 
     "cursor" : "BtreeCursor tags_1", 
     "isMultiKey" : true, 
     "n" : 0, 
     "nscannedObjects" : 0, 
     "nscanned" : 0, 
     "nscannedObjectsAllPlans" : 0, 
     "nscannedAllPlans" : 0, 
     "scanAndOrder" : false, 
     "indexOnly" : false, 
     "nYields" : 0, 
     "nChunkSkips" : 0, 
     "millis" : 0, 
     "indexBounds" : { 
       "tags" : [ 
         [ 
           "f", 
           "f" 
         ] 
       ] 
     }, 
     "server" : "ubuntu:27017", 
     "filterSet" : false 
} 

而在3.0.0:

> db.t.find({tags:{$all:['g','t']}}).explain() 
{ 
     "queryPlanner" : { 
       "plannerVersion" : 1, 
       "namespace" : "test.t", 
       "indexFilterSet" : false, 
       "parsedQuery" : { 
         "$and" : [ 
           { 
             "tags" : { 
               "$eq" : "g" 
             } 
           }, 
           { 
             "tags" : { 
               "$eq" : "t" 
             } 
           } 
         ] 
       }, 
       "winningPlan" : { 
         "stage" : "KEEP_MUTATIONS", 
         "inputStage" : { 
           "stage" : "FETCH", 
           "filter" : { 
             "tags" : { 
               "$eq" : "t" 
             } 
           }, 
           "inputStage" : { 
             "stage" : "IXSCAN", 
             "keyPattern" : { 
               "tags" : 1 
             }, 
             "indexName" : "tags_1", 
             "isMultiKey" : true, 
             "direction" : "forward", 
             "indexBounds" : { 
               "tags" : [ 
                 "[\"g\", \"g\"]" 
               ] 
             } 
           } 
         } 
       }, 
       "rejectedPlans" : [ 
         { 
           "stage" : "FETCH", 
           "inputStage" : { 
             "stage" : "KEEP_MUTATIONS", 
             "inputStage" : { 
               "stage" : "AND_SORTED", 
               "inputStages" : [ 
                 { 
                   "stage" : "IXSCAN", 
                   "keyPattern" : { 
                     "tags" : 1 
                   }, 
                   "indexName" : "tags_1", 
                   "isMultiKey" : true, 
                   "direction" : "forward", 
                   "indexBounds" : { 
                     "tags" : [ 
                       "[\"g\", \"g\"]" 
                     ] 
                   } 
                 }, 
                 { 
                   "stage" : "IXSCAN", 
                   "keyPattern" : { 
                     "tags" : 1 
                   }, 
                   "indexName" : "tags_1", 
                   "isMultiKey" : true, 
                   "direction" : "forward", 
                   "indexBounds" : { 
                     "tags" : [ 
                       "[\"t\", \"t\"]" 
                     ] 
                   } 
                 } 
               ] 
             } 
           } 
         } 
       ] 
     }, 
     "serverInfo" : { 
       "host" : "ip-172-30-0-35", 
       "port" : 27017, 
       "version" : "3.0.0", 
       "gitVersion" : "a841fd6394365954886924a35076691b4d149168" 
     }, 
     "ok" : 1 
} 

就算我没有在2.6.9,但我测试已经在更低版本和更高版本上进行了测试,并且我无法复制此行为。

+0

谢谢你的回答,我已将mongo升级到3.0.2版本,请参阅我的编辑以获取新的测试结果。 – lakier