#!/usr/bin/python
# Copyright 2007 Alex K (wtwf.com) All rights reserved.
"""Fix an rss feed so that it is flagged as private.
Possibly Expire old items.
"""
from xml.dom import minidom
import xml.sax.saxutils
import time
import datetime
import getopt
import getpass
import logging
import os
import sys
if sys.version_info < (2, 4):
raise "You probably need python 2.5 or greater"
BASE_DIR = os.path.expanduser("~")
def usage(code, msg=''):
if code:
fd = sys.stderr
else:
fd = sys.stdout
PROGRAM = os.path.basename(sys.argv[0])
print >> fd, __doc__ % locals()
if msg:
print >> fd, msg
sys.exit(code)
def GetFilename(filename):
if os.path.exists(filename):
return filename
else:
return os.path.join(BASE_DIR, filename)
class FeedFixer:
def __init__(self, source_filename, destination_filename,
url=None,
add_noindex=True,
expire_items_older_than=datetime.timedelta(days=28)):
self.source_filename = GetFilename(source_filename)
self.destination_filename = GetFilename(destination_filename)
self.add_noindex = add_noindex
self.expire_items_older_than = expire_items_older_than
self.url = url
def IsFresh(self):
return (os.path.exists(self.destination_filename) and
os.stat(self.destination_filename).st_mtime >
os.stat(self.source_filename).st_mtime)
def Fix(self):
source = self.source_filename
destination = self.destination_filename
logging.info("%s -> %s", source, destination)
source_file = open(source)
txt = "\n".join(source_file.readlines())
source_file.close()
if self.add_noindex:
if "')
txt = txt.replace(' self.expire_items_older_than:
title.firstChild.data = "This post is too old and has been removed"
content.firstChild.data = "This post is too old and has been removed"
is_old=True
self.RemoveBloggerFeedLinks(entry, is_old)
return xmldoc.toxml(encoding='utf-8')
def ParseTime(self, s):
return datetime.datetime(*time.strptime(s[0:19], "%Y-%m-%dT%H:%M:%S")[0:6])
def RemoveBloggerFeedLinks(self, ele, is_old=False):
"""Strip out any links that refer to blogger.com/feeds"""
links = ele.getElementsByTagName("link")
for link in links:
href = link.getAttribute("href")
if is_old and link.hasAttribute("title"):
link.removeAttribute("title")
rel = link.getAttribute("rel")
if rel and "replies" == rel:
link.parentNode.removeChild(link)
elif href and "blogger.com/feed" in href:
if rel and "self" == rel:
if self.url:
link.setAttribute("href", self.url)
else:
link.parentNode.removeChild(link)
def main():
"""The main function runs the Bookmarks application with the provided
username and password values. Authentication credentials are required.
"""
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
# parse command line options
try:
opts, args = getopt.getopt(sys.argv[1:], "c:f", ["config=", "force"])
except getopt.error, msg:
usage()
# Process options
config_filename = None
force = False
for opt, arg in opts:
if opt in ("-c", "--config"):
config_filename = arg
if opt in ("-f", "--force"):
force = True
if config_filename is None:
config_filename = os.path.expanduser("~/.feedFixerrc")
if os.path.exists(config_filename):
logging.info("Loading config file: %s", config_filename)
execfile(config_filename)
# The old configuration used to define a list called feeds of
# sources and destinations
for feed in FEEDS:
if isinstance(feed, FeedFixer):
pass
else:
source, destination = feed
feed = FeedFixer(source, destination)
# do we even want to do anything
if not force and feed.IsFresh():
logging.info("Nothing to do %s is older than %s",
feed.destination_filename, feed.source_filename)
continue
feed.Fix()
FEEDS = (FeedFixer("atom.clean.xml", "atom.xml",
url="http://wtwf.com/tmp/test.xml"),)
if __name__ == '__main__':
main()