2011-05-06 225 views
2

以下是我想在python做:如何在python中修改atom/rss feed?

  1. 取料
  2. 数据添加到饲料
  3. 重新发布这样的改性原料

Feedparser做得很好在解析时,它似乎没有提供一种方法来编写来自FeedParserDict对象的XML文档。

是否有更简单的替代方案来编写我自己的组合函数?

回答

2

我写我自己的Atom 1.0串行器,使用Django的模板引擎。我去使它尽可能完整的加倍努力,以便它可以重新被其他人使用:

import feedparser 
from django.template import Template 
d = feedparser.parse("http://example.com/feed.xml") # creating a FeedParserDict 
# ... do stuff with d 
t = Template(the_stuff_below) # compiling the template 
output_atom_document = t.render(d) 

注意feedparser的环卫工作由feedparser.parse()做而已,所以任何你养活一个FeedParserDict必要时必须事先进行消毒。

如果你想用它来修改一个feed,记住你必须改变id值(一个是feed,一个是entry),因为它们应该是唯一的。

<?xml version="1.0" encoding="{{ encoding }}"?> 
<feed xmlns="http://www.w3.org/2005/Atom"> 
    <title type="{{ feed.title_detail.type }}" xml:lang="{{ feed.title_detail.language }}" xml:base="{{ feed.title_detail.base }}">{{ feed.title|escape }}</title> 
{% for link in feed.links %} 
    <link rel="{{ link.rel }}" type="{{ link.type }}" href="{{ link.href }}" title="{{ link.title }}"/> 
{% endfor %} 
    <subtitle type="{{ feed.subtitle_detail.type }}" xml:lang="{{ feed.subtitle_detail.language }}" xml:base="{{ feed.subtitle_detail.base }}">{{ feed.subtitle|escape }}</subtitle> 
    <rights type="{{ feed.rights_detail.type }}" xml:lang="{{ feed.rights_detail.language }}" xml:base="{{ feed.rights_detail.base }}">{{ feed.rights|escape }}</rights> 
    <generator uri="{{ feed.generator_detail.href }}" version="{{ feed.generator_detail.version }}">{{ feed.generator }}</generator> 
    <info type="{{ feed.info_detail.type }}" xml:lang="{{ feed.info_detail.language }}" xml:base="{{ feed.info_detail.base }}">{{ feed.info|escape }}</info> 
    <updated>{{ feed.updated }}</updated> 
    <id>{{ feed.id }}</id> 
    <author> 
     <name>{{ feed.author }}</name> 
     <uri>{{ feed.author_detail.href }}</uri> 
     <email>{{ feed.author_detail.email }}</email> 
    </author> 
{% for contributor in feed.contributors %} 
    <contributor> 
     <name>{{ contributor }}</name> 
     <uri>{{ contributor_detail.href }}</uri> 
     <email>{{ contributor_detail.email }}</email> 
    </contributor> 
{% endfor %} 
    <image> 
     <title>{{ feed.image.title }}</title> 
     <url>{{ feed.image.href }}</url> 
     <link>{{ feed.image.link }}</link> 
     <width>{{ feed.image.width }}</width> 
     <height>{{ feed.image.height }}</height> 
     <description>{{ feed.image.description }}</description> 
    </image> 
    <icon>{{ feed.icon }}</icon> 
    <!-- not part of Atom 1.0: feed.textinput --> 
    <!-- not part of Atom 1.0: feed.cloud --> 
    <publisher> 
     <name>{{ feed.publisher }}</name> 
     <uri>{{ feed.publisher_detail.href }}</uri> 
     <email>{{ feed.publisher_detail.email }}</email> 
    </publisher> 
{% for tag in feed.tags %} 
    <tag> 
     <term>{{ tag.term }}</term> 
     <scheme>{{ tag.scheme }}</scheme> 
     <label>{{ tag.label }}</label> 
    </tag> 
{% endfor %} 
    <language>{{ feed.language }}</language> 
    <!-- not part of Atom 1.0: feed.license --> 
    <!-- not part of Atom 1.0: feed.errorreportsto --> 
{% for entry in entries %} 
    <entry> 
     <title type="{{ entry.title_detail.type }}" xml:lang="{{ entry.title_detail.language }}" xml:base="{{ entry.title_detail.base }}">{{ entry.title|escape }}</title> 
    {% for link in entry.links %} 
     <link rel="{{ link.rel }}" type="{{ link.type }}" href="{{ link.href }}" title="{{ link.title }}"/> 
    {% endfor %} 
     <summary type="{{ entry.summary_detail.type }}" xml:lang="{{ entry.summary_detail.language }}" xml:base="{{ entry.summary_detail.base }}">{{ entry.summary|escape }}</summary> 
    {% for content in entry.content %} 
     <content type="{{ content.type }}" xml:lang="{{ content.language }}" xml:base="{{ content.base }}">{{ content.value|escape }}</content> 
    {% endfor %} 
     <published>{{ entry.published }}</published> 
     <updated>{{ entry.updated }}</updated> 
     <created>{{ entry.created }}</created> 
     <!-- not part of Atom 1.0: entry.expired --> 
     <id>{{ entry.id }}</id> 
     <author> 
      <name>{{ entry.author }}</name> 
      <uri>{{ entry.author_detail.href }}</uri> 
      <email>{{ entry.author_detail.email }}</email> 
     </author> 
    {% for contributor in entry.contributors %} 
     <contributor> 
      <name>{{ contributor }}</name> 
      <uri>{{ contributor_detail.href }}</uri> 
      <email>{{ contributor_detail.email }}</email> 
     </contributor> 
    {% endfor %} 
    {% for enclosure in entry.enclosures %} 
     <link rel="enclosure" href="{{ enclosure.href }}" length="{{ enclosure.length }}" type="{{ enclosure.type }}"/> 
    {% endfor %} 
     <publisher> 
      <name>{{ entry.publisher }}</name> 
      <uri>{{ entry.publisher_detail.href }}</uri> 
      <email>{{ entry.publisher_detail.email }}</email> 
     </publisher> 
    {% for tag in entry.tags %} 
     <tag> 
      <term>{{ tag.term }}</term> 
      <scheme>{{ tag.scheme }}</scheme> 
      <label>{{ tag.label }}</label> 
     </tag> 
    {% endfor %} 
    <source> 
     <author> 
      <name>{{ entry.source.author }}</name> 
      <uri>{{ entry.source.author_detail.href }}</uri> 
      <email>{{ entry.source.author_detail.email }}</email> 
     </author> 
    {% for contributor in entry.source.contributors %} 
     <contributor> 
      <name>{{ contributor }}</name> 
      <uri>{{ contributor_detail.href }}</uri> 
      <email>{{ contributor_detail.email }}</email> 
     </contributor> 
    {% endfor %} 
     <icon>{{ entry.source.icon }}</icon> 
     <id>{{ entry.source.id }}</id> 
    {% for link in entry.source.links %} 
     <link rel="{{ link.rel }}" type="{{ link.type }}" href="{{ link.href }}" title="{{ link.title }}"/> 
    {% endfor %} 
     <logo>{{ entry.source.logo }}</logo> 
     <rights type="{{ entry.source.rights_detail.type }}" xml:lang="{{ entry.source.rights_detail.language }}" xml:base="{{ entry.source.rights_detail.base }}">{{ entry.source.rights|escape }}</rights> 
     <subtitle type="{{ entry.source.subtitle_detail.type }}" xml:lang="{{ entry.source.subtitle_detail.language }}" xml:base="{{ entry.source.subtitle_detail.base }}">{{ entry.source.subtitle|escape }}</subtitle> 
     <title type="{{ entry.source.title_detail.type }}" xml:lang="{{ entry.source.title_detail.language }}" xml:base="{{ entry.source.title_detail.base }}">{{ entry.source.title|escape }}</title> 
     <updated>{{ entry.source.updated }}</updated> 
    </source> 
    <!-- not part of Atom 1.0: entry.comments --> 
    <!-- not part of Atom 1.0: entry.license --> 
    </entry> 
{% endfor %} 
    <!-- meaningless: version (this is Atom 1.0) --> 
    <!-- meaningless: namespaces (namespace set to "http://www.w3.org/2005/Atom" in the <feed> tag) --> 
    <!-- somewhere else: encoding (in the XML declaration) --> 
    <!-- meaningless: status (HTTP status) --> 
    <!-- meaningless: href (present if server redirect when fetching the original feed) --> 
    <!-- meaningless: etag (part of HTTP headers) --> 
    <!-- meaningless: modified (part of HTTP headers) --> 
    <!-- meaningless: headers (HTTP headers) --> 
    <!-- meaningless: bozo (set to 1 if not well-formed XML) --> 
    <!-- meaningless: bozo_exception --> 
</feed>