diff options
Diffstat (limited to 'feedgetter.py')
-rwxr-xr-x | feedgetter.py | 110 |
1 files changed, 48 insertions, 62 deletions
diff --git a/feedgetter.py b/feedgetter.py index aa463b7d..e3469132 100755 --- a/feedgetter.py +++ b/feedgetter.py @@ -27,14 +27,15 @@ __license__ = "GPLv3" import os.path import traceback -import sqlite3 import threading import feedparser +import hashlib from BeautifulSoup import BeautifulSoup from datetime import datetime import utils +import mongodb feeds_list = [] list_of_threads = [] @@ -50,11 +51,8 @@ class FeedGetter(object): """ Initializes the base and variables. """ - # Create the base if not exists. - utils.create_base() - - # mutex to protect the SQLite base - self.locker = threading.Lock() + # MongoDB connections + self.articles = mongodb.Articles() def retrieve_feed(self): """ @@ -84,25 +82,12 @@ class FeedGetter(object): """Request the URL Executed in a thread. - SQLite objects created in a thread can only be used in that same thread ! """ - # Protect this part of code. - self.locker.acquire() - - self.conn = sqlite3.connect(utils.sqlite_base, isolation_level = None) - self.c = self.conn.cursor() - if utils.detect_url_errors([the_good_url]) == []: # if ressource is available add the articles in the base. - self.add_into_sqlite(the_good_url) - - self.conn.commit() - self.c.close() + self.add_into_database(the_good_url) - # Release this part of code. - self.locker.release() - - def add_into_sqlite(self, feed_link): + def add_into_database(self, feed_link): """ Add the articles of the feed 'a_feed' in the SQLite base. """ @@ -113,16 +98,23 @@ class FeedGetter(object): feed_image = a_feed.feed.image.href except: feed_image = "/img/feed-icon-28x28.png" - try: - self.c.execute('insert into feeds values (?,?,?,?,?)', (\ - utils.clear_string(a_feed.feed.title.encode('utf-8')), \ - a_feed.feed.link.encode('utf-8'), \ - feed_link, \ - feed_image, - "0")) - except sqlite3.IntegrityError: - # feed already in the base - pass + + sha1_hash = hashlib.sha1() + sha1_hash.update(feed_link.encode('utf-8')) + feed_id = sha1_hash.hexdigest() + + collection_dic = {"feed_id": feed_id, \ + "type": 0, \ + "feed_image": feed_image, \ + "feed_title": utils.clear_string(a_feed.feed.title.encode('utf-8')), \ + "feed_link": feed_link, \ + "site_link": a_feed.feed.link.encode('utf-8'), \ + "mail": False \ + } + + self.articles.add_collection(collection_dic) + + articles = [] for article in a_feed['entries']: description = "" try: @@ -142,37 +134,31 @@ class FeedGetter(object): except: post_date = datetime(*article.published_parsed[:6]) - try: - # try. Will only success if the article is not already in the data base - self.c.execute('insert into articles values (?, ?, ?, ?, ?, ?, ?)', (\ - post_date, \ - article_title, \ - article.link.encode('utf-8'), \ - description, \ - "0", \ - feed_link, \ - "0")) - result = self.c.execute("SELECT mail from feeds WHERE feed_site_link='" + \ - a_feed.feed.link.encode('utf-8') + "'").fetchall() - if result[0][0] == "1": - # if subscribed to the current feed - # send the article by e-mail - try: - threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ - a_feed.feed.title.encode('utf-8'), \ - article_title, description) \ - ).start() - except Exception, e: - # SMTP acces denied, to many SMTP connections, etc. - top = traceback.extract_stack()[-1] - print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1])]) - except sqlite3.IntegrityError: - # article already in the data base - pass - except Exception, e: - # Missing information (updated_parsed, ...) - top = traceback.extract_stack()[-1] - print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1]), str(traceback.extract_stack()[-2][3])]) + + sha1_hash = hashlib.sha1() + sha1_hash.update(article.link.encode('utf-8')) + article_id = sha1_hash.hexdigest() + + article = {"article_id": article_id, \ + "type":1, \ + "article_date": post_date, \ + "article_link": article.link.encode('utf-8'), \ + "article_title": article_title, \ + "article_content": description, \ + "article_readed": False, \ + "article_like": False \ + } + + articles.append(article) + + self.articles.add_articles(articles, feed_id) + + # send new articles by e-mail if desired. + #threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ + #a_feed.feed.title.encode('utf-8'), \ + #article_title, description) \ + #).start() + if __name__ == "__main__": |