2013-08-06 90 views
1

我想运行一个查询对MongoDB和使用pymongo写出结果作为单独的XML文件。当我只使用排序而不添加任何查找条件时,我可以使查询生效。我想要做的是能够插入各种查找条件,并让脚本根据它写出xml。 脚本这一行工作:使用查找和排序MongoDB查询

def find_id_sort(): 
    for post in db.video_md_fcsvr.find({},).sort("_id",): 
但是如果我尝试添加一个查找标准,像这样

;

def find_id_sort(): 
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id",): 

结果是这样的:

KeyError异常: '格式' 在python_write_xml_toFile.py 功能find_id_sort在线路43 format_data =交[ '格式']

全脚本是在这里:

import sys 
import os 
import xml.etree.cElementTree as ET 
import pymongo 
from pymongo import MongoClient 
from bson import Binary, Code 
from bson.json_util import dumps 
import io, json 
from itertools import groupby 
from bson.objectid import ObjectId 
import datetime 


connection = MongoClient("localhost:27017", slave_okay=True) 
db = connection['video_ingest_db']  


def find_id_sort(): 
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id",): # find all entries (with filename - add: "({}, {'format.filename':1})" and sort using _id 
     # GRAB VALUES FROM FIELDS -------------------------------------------------- 
     video_id_timestamp = post['_id'].generation_time # get the timestamp off the Object ID 
     video_id = post['_id'] # get the ObjectId 
     streams_data = post['streams'] 
     format_data = post['format'] 
     format_name = post['format']['format_name'] 
     format_long_name = post['format']['format_long_name'] 
     path_filename = post['format']['filename'] 

     codec_name_0 = post['streams'][0]['codec_name'] 
     codec_name_1 = post['streams'][1]['codec_name'] 
     #codec_name_2 = post['streams'][2]['codec_name'] 

     codec_type_0 = post['streams'][0]['codec_type'] 
     codec_type_1 = post['streams'][1]['codec_type'] 
     #codec_type_2 = post['streams'][2]['codec_type'] 

     frame_rate = post['streams'][0]['avg_frame_rate'] 
     video_height = post['streams'][0]['height'] 
     video_width = post['streams'][0]['width'] 

     #print "codec_name: %s" % streams_data_codec_name 
     print "format name: %s " % format_name 
     #print "Streams data: %s" % streams_data 
     print "format data: %s" % format_data 
     print "Video ID: %s" % video_id 
     print "Creation Time: %s" % video_id_timestamp 
     #print "Metadata: %s" % post 

     # CONVERT OUT TO STRING -------------------------------------------------- 
     id_to_string = str(video_id) # convert the ObjectId to string 
     timestamp_to_string = str(video_id_timestamp) 
     filename_to_string = str(path_filename) 
     format_name_to_string = str(format_name) 
     format_long_name_to_string = str(format_long_name) 

     codec_name_0_to_str = str(codec_name_0) 
     codec_name_1_to_str = str(codec_name_1) 
     #codec_name_2_to_str = str(codec_name_2) 

     codec_type_0_to_str = str(codec_type_0) 
     codec_type_1_to_str = str(codec_type_1) 
     #codec_type_2_to_str = str(codec_type_2) 

     frame_rate_to_str = str(frame_rate) 
     video_height_to_str = str(video_height) 
     video_width_to_str = str(video_width) 

     metadata_file_name = "/Users/mathiesj/Desktop/metadata/" + id_to_string + ".xml" # create the path and filenaming convention of the metadata files 


     # WRITE VALUES TO XML FORMAT -------------------------------------------------- 
     root = ET.Element("video_metadata") 
     metadata = ET.SubElement(root, "metadata") 
     #streams = ET.SubElement(root, "streams") 

     mongodb_id_field = ET.SubElement(metadata, "mongodb_id") 
     mongodb_id_field.set("id", id_to_string) 
     mongodb_id_field.text = "some value1" 

     creation_time_field = ET.SubElement(metadata, "creation_time") 
     creation_time_field.set("time_stamp", timestamp_to_string) 
     creation_time_field.text = "some value2" 

     filename_field = ET.SubElement(metadata, "path_filename") 
     filename_field.set("path_filename", filename_to_string) 
     filename_field.text = "some value3" 

     video_format_field = ET.SubElement(metadata, "video_format") 
     video_format_field.set("video_format_name", format_name_to_string) 
     video_format_field.set("video_format_long_name", format_long_name_to_string) 
     video_format_field.text = "some value4" 

     stream0_field = ET.SubElement(metadata, "stream_0") 
     stream0_field.set("codec_type", codec_type_0_to_str) 
     stream0_field.set("codec_name", codec_name_0_to_str) 
     stream0_field.set("frame_rate", frame_rate_to_str) 
     stream0_field.set("video_height", video_height_to_str) 
     stream0_field.set("video_width", video_width_to_str) 
     stream0_field.text = "some value5" 

     stream1_field = ET.SubElement(metadata, "stream_1") 
     stream1_field.set("codec_type", codec_type_1_to_str) 
     stream1_field.set("codec_name", codec_name_1_to_str) 
     stream1_field.text = "some value6" 

     #stream2_field = ET.SubElement(technical_metadata, "stream 2") 
     #stream2_field.set("codec_type", codec_type_2_to_str) 
     #stream2_field.set("codec_name", codec_name_2_to_str) 
     #stream2_field.text = "some value4" 


     tree = ET.ElementTree(root) 
     tree.write(metadata_file_name) 




find_id_sort() 

回答

2

应该不是你的查询是:

db.video_md_fcsvr.find({"streams.codec": "prores"}).sort("_id", 1): 

第一个参数是标准,第二个是预测,它选择要返回的字段。

+0

可能 - 让我试试 – JRM

+0

谢谢 - 那很完美。我想我正在寻找自己的盲人 – JRM

+0

也只是意识到,我试图调用streams.codec而不是streams.codec_name - 我改变了脚本 – JRM