#!/usr/bin/env python """ THIS IS DEPRECATED: PLEASE REFER CVS FOR LATEST VERSION http://vsbabu.org/tools/viewcvs.cgi/pyblagg/ pyblagg: quick aggregator for Linux/Unix machines. This is an AGGREGATOR, not a news reader. Most readers out there still force me to go to each channel I subscribe to and figure out what the latest news is. This collects all the channels, aggregates the news and then display latest news first. installation and usage: - get Mark Pilgrim's ultraliberal parser - see URL below - you need to patch it. Add the following 4 lines to the class RSSParser start_date = start_dc_date end_date = end_dc_date start_pubdate = start_dc_date end_pubdate = end_dc_date - get log4py module - see URL below - edit configuration settings in your file ~/.pyblagg - give it execute permission - run it from command line periodically. I don't recommend running these from CGI. TODO: - Take timezone into consideration - At the moment, this uses my template. Need to change it to use some nice templating language. - It is a bad bad world out there for RSS/RDF. Everyone seem to have their own definitions. Make more graceful error checking. - Use pickle or some other persistent module instead of saving feeds in files. """ __author__ = "S Babu " __date__ = "20 September 2002" __credits__ = """Mark Pilgrim, for liberal RSS parser http://www.diveintomark.org/archives/2002/08/13.html#ultraliberal_rss_parser Martin Preishuber, for log4py http://sourceforge.net/project/showfiles.php?group_id=36216 """ __version__ = "$Revision: 1.2 $" #$Source: /home/vsbabu/repository/python/pyblagg/pyblagg.py,v $ #$Log: pyblagg.py,v $ #Revision 1.2 2002/09/23 20:51:33 vsbabu #Removed setup options from the script to a configuration file. It is parsed using #ConfigParser module. See the end of the script for sample. # ---------- start configuration ------------------------ config_file = "/home/vsbabu/.pyblagg" log_level = 2 #log level can be 0 .. 4 as specified in log4py #0 = none. 1 = only errors. 2 = normal. #3 = verbose. 4 = debug #This is overridden later by configuration file setting # ---------- end configuration ------------------------ import rssparser import os import time import re from log4py import Logger log4py = Logger().get_instance() log4py.set_loglevel(1<%s""" % (self.link, self.description, self.title) else: return """%s""" % (self.link, self.title) class NewsItem: """Each item in a channel""" def __init__(self, dict): self.link = dict['link'] self.title, self.description = titleOrdesc(dict) if 'date' in dict.keys(): self.date = parsetime(dict['date']) else: self.date = None self.filenm = permalink2localfile(self.link) if __name__ == '__main__': log4py.info("startup") log4py.info("get configuration options from " + config_file) from ConfigParser import ConfigParser config = ConfigParser() config.readfp(open(config_file)) datadir = config.get("DEFAULT", "data_directory") output = config.get("DEFAULT", "output_html_file") blog_items_to_show = config.getint("DEFAULT", "blog_items_to_show") log_level = config.getint("DEFAULT", "log_level") dirlist_rev_sort = config.get("DEFAULT", "directory_command") log4py.info("got default configuration options") log4py.debug("Data directory = " + datadir) log4py.debug("Output file = " + output) log4py.debug("Number of items shown = %d" % blog_items_to_show) blogroll = {} for blog in config.sections(): if config.has_option(blog, "name"): blogroll[blog] = config.get(blog, "name") else: blogroll[blog] = None log4py.info("got blog information") # let us first update our mini database channels = [] for blog in blogroll.keys(): log4py.info("creating channel [%s]" % blog) channel = Channel(blog, blogroll[blog]) try: channel.loadChannel() except: log4py.error("Could not download channel => " + blog) continue channels.append(channel) log4py.info("collected all channels") log4py.info("start processing channels") blogroll_string = "" for channel in channels: log4py.debug("\t channel:" + channel.title) blogroll_string = blogroll_string + channel.htmlLink() + "
" for item in channel.items: filenm = datadir + os.sep + item.filenm log4py.debug("\t\t" + item.title[:20] + "[" + filenm + "]") if item.date is None: if os.path.exists(filenm): item.date = os.stat(filenm)[8] log4py.warn("[%s]%s - had to use existing file time" % (channel.title, item.title[:20]) ) else: item.date = time.mktime(time.localtime()) log4py.warn("[%s]%s - had to use system time" % (channel.title, item.title[:20]) ) #this is probably a bad idea to update it always. then again, #people don't put 1001 things on their rdf's entry = """%s\n""" % (item.link, item.title) if item.description is not None: entry = entry + "\n" + item.description entry = entry + '
\n' try: fp = open(filenm,"w") fp.write(entry) fp.close() except: log4py.error("Could not save =>", filenm) continue if item.date is not None: os.utime(filenm, (item.date, item.date)) log4py.info("finished processing channels") log4py.info("preparing output file " + output) ofp = open(output, "w") ofp.write("""
""") log4py.info("getting channel data") aggritems = os.popen(dirlist_rev_sort + ' ' + datadir).readlines()[:blog_items_to_show] for blogitem in aggritems: blogitem = datadir + os.sep + blogitem.strip() blogdate = time.strftime("%Y.%m.%d %I:%M", time.localtime(os.stat(blogitem)[8])) ofp.write("

"+ open(blogitem, "r").read() + "

\n") ofp.write("""
""" % (time.strftime("%B %d, %Y %I:%M %p", time.localtime()), blogroll_string) ) ofp.close() log4py.info("finished making output " + output) #---------------- sample config file ------------------------ #[DEFAULT] #data_directory = /home/vsbabu/software/pyblagg/data #output_html_file = /home/vsbabu/www/mt/feeds/index.html #blog_items_to_show = 60 #directory_command = ls -t #log_level = 2 # #[http://www.diveintomark.org/xml/rss.xml] #name = Mark Pilgrim # #[http://www.brunningonline.net/simon/blog/index.rdf] #name = Simon Brunning # #[http://vsbabu.org/tharunya/blog/index.rdf] #name = Tharunya Bhasker