From cfea1528ff9704483a73fd751859466eb349868f Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Sat, 30 Jan 2010 09:30:42 +0100 Subject: Downloading of informations feeds and the update of base are asynchronous. Use of a mutex to protect the base. You can now refresh the web interface during the downloading of feeds. --- feedgetter.py | 101 +++++++++++++++++++++++++++++++++------------------------- var/feed.lst | 2 ++ 2 files changed, 60 insertions(+), 43 deletions(-) diff --git a/feedgetter.py b/feedgetter.py index 5a560763..a51515fe 100644 --- a/feedgetter.py +++ b/feedgetter.py @@ -23,49 +23,67 @@ feeds_list = [] list_of_threads = [] - - -def process(the_good_url): - """Request the URL - - Executed in a thread. +class FeedGetter(object): """ - feeds_list.append(feedparser.parse(the_good_url)) - -def retrieve_feed(): - """ - Parse the file 'feeds.lst' and launch a thread for each RSS feed. """ - conn = sqlite3.connect("./var/feed.db", isolation_level = None) - c = conn.cursor() - c.execute('''create table if not exists rss_feed - (date text, feed_title text, feed_site_link text, \ - article_title text, article_link text PRIMARY KEY)''') - - for a_feed in feeds_file.readlines(): - # test if the URL is well formed - for url_regexp in url_finders: - if url_regexp.match(a_feed): - the_good_url = url_regexp.match(a_feed).group(0).replace("\n", "") - try: - # launch a new thread for the RSS feed - thread = threading.Thread(None, process, \ - None, (the_good_url,)) - thread.start() - list_of_threads.append(thread) - except: - pass - break - - # wait for all threads are done - for th in list_of_threads: - th.join() - - # when all jobs are done, insert articles in the base - for a_feed in feeds_list: + def __init__(self): + # mutex to protect the SQLite base + self.locker = threading.Lock() + + self.retrieve_feed() + + def retrieve_feed(self): + """ + Parse the file 'feeds.lst' and launch a thread for each RSS feed. + """ + for a_feed in feeds_file.readlines(): + # test if the URL is well formed + for url_regexp in url_finders: + if url_regexp.match(a_feed): + the_good_url = url_regexp.match(a_feed).group(0).replace("\n", "") + try: + # launch a new thread for the RSS feed + thread = threading.Thread(None, self.process, \ + None, (the_good_url,)) + thread.start() + list_of_threads.append(thread) + except: + pass + break + + # wait for all threads are done + for th in list_of_threads: + th.join() + + def process(self, the_good_url): + """Request the URL + + Executed in a thread. + SQLite objects created in a thread can only be used in that same thread ! + """ + self.locker.acquire() + + self.conn = sqlite3.connect("./var/feed.db", isolation_level = None) + self.c = self.conn.cursor() + self.c.execute('''create table if not exists rss_feed + (date text, feed_title text, feed_site_link text, \ + article_title text, article_link text PRIMARY KEY)''') + + # add the articles in the base + self.add_into_sqlite(feedparser.parse(the_good_url)) + + self.conn.commit() + self.c.close() + + self.locker.release() + + def add_into_sqlite(self, a_feed): + """ + Add the articles of the feed 'a_feed' in the SQLite base. + """ for article in a_feed['entries']: try: - c.execute('insert into rss_feed values (?,?,?,?,?)', (\ + self.c.execute('insert into rss_feed values (?,?,?,?,?)', (\ "-".join([str(i) for i in list(article.updated_parsed)]), \ a_feed.feed.title.encode('utf-8'), \ a_feed.feed.link.encode('utf-8'), \ @@ -74,9 +92,6 @@ def retrieve_feed(): except sqlite3.IntegrityError: pass - conn.commit() - c.close() - if __name__ == "__main__": # Point of entry in execution mode @@ -86,4 +101,4 @@ if __name__ == "__main__": print "./feed.lst not found" exit(0) - retrieve_feed() \ No newline at end of file + FeedGetter() \ No newline at end of file diff --git a/var/feed.lst b/var/feed.lst index e525e3d6..56178150 100644 --- a/var/feed.lst +++ b/var/feed.lst @@ -10,3 +10,5 @@ http://theinvisiblethings.blogspot.com/feeds/posts/default http://torvalds-family.blogspot.com/feeds/posts/default http://www.python.org/channews.rdf http://www.kde.org/dotkdeorg.rdf +http://feeds.feedburner.com/internetactu/bcmJ +http://www.april.org/fr/rss.xml -- cgit