#!/usr/bin/python # Copyright 2007 Alex K (wtwf.com) All rights reserved. """Fix an rss feed so that it is flagged as private. Possibly Expire old items. """ from xml.dom import minidom import xml.sax.saxutils import time import datetime import getopt import getpass import logging import os import sys if sys.version_info < (2, 4): raise "You probably need python 2.5 or greater" BASE_DIR = os.path.expanduser("~") def usage(code, msg=''): if code: fd = sys.stderr else: fd = sys.stdout PROGRAM = os.path.basename(sys.argv[0]) print >> fd, __doc__ % locals() if msg: print >> fd, msg sys.exit(code) def GetFilename(filename): if os.path.exists(filename): return filename else: return os.path.join(BASE_DIR, filename) class FeedFixer: def __init__(self, source_filename, destination_filename, url=None, add_noindex=True, expire_items_older_than=datetime.timedelta(days=28)): self.source_filename = GetFilename(source_filename) self.destination_filename = GetFilename(destination_filename) self.add_noindex = add_noindex self.expire_items_older_than = expire_items_older_than self.url = url def IsFresh(self): return (os.path.exists(self.destination_filename) and os.stat(self.destination_filename).st_mtime > os.stat(self.source_filename).st_mtime) def Fix(self): source = self.source_filename destination = self.destination_filename logging.info("%s -> %s", source, destination) source_file = open(source) txt = "\n".join(source_file.readlines()) source_file.close() if self.add_noindex: if "') txt = txt.replace(' self.expire_items_older_than: title.firstChild.data = "This post is too old and has been removed" content.firstChild.data = "This post is too old and has been removed" is_old=True self.RemoveBloggerFeedLinks(entry, is_old) return xmldoc.toxml(encoding='utf-8') def ParseTime(self, s): return datetime.datetime(*time.strptime(s[0:19], "%Y-%m-%dT%H:%M:%S")[0:6]) def RemoveBloggerFeedLinks(self, ele, is_old=False): """Strip out any links that refer to blogger.com/feeds""" links = ele.getElementsByTagName("link") for link in links: href = link.getAttribute("href") if is_old and link.hasAttribute("title"): link.removeAttribute("title") rel = link.getAttribute("rel") if rel and "replies" == rel: link.parentNode.removeChild(link) elif href and "blogger.com/feed" in href: if rel and "self" == rel: if self.url: link.setAttribute("href", self.url) else: link.parentNode.removeChild(link) def main(): """The main function runs the Bookmarks application with the provided username and password values. Authentication credentials are required. """ logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) # parse command line options try: opts, args = getopt.getopt(sys.argv[1:], "c:f", ["config=", "force"]) except getopt.error, msg: usage() # Process options config_filename = None force = False for opt, arg in opts: if opt in ("-c", "--config"): config_filename = arg if opt in ("-f", "--force"): force = True if config_filename is None: config_filename = os.path.expanduser("~/.feedFixerrc") if os.path.exists(config_filename): logging.info("Loading config file: %s", config_filename) execfile(config_filename) # The old configuration used to define a list called feeds of # sources and destinations for feed in FEEDS: if isinstance(feed, FeedFixer): pass else: source, destination = feed feed = FeedFixer(source, destination) # do we even want to do anything if not force and feed.IsFresh(): logging.info("Nothing to do %s is older than %s", feed.destination_filename, feed.source_filename) continue feed.Fix() FEEDS = (FeedFixer("atom.clean.xml", "atom.xml", url="http://wtwf.com/tmp/test.xml"),) if __name__ == '__main__': main()