From f16b4d8cf84a9c1632eec0f2819fe96ca7d698b7 Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Sat, 3 Mar 2012 13:52:36 +0100 Subject: feedgetter.py now uses MongoDB database? --- feedgetter.py | 77 ++++++++++++++++++++++++++++------------------------------- mongodb.py | 29 +++++++++++----------- 2 files changed, 51 insertions(+), 55 deletions(-) diff --git a/feedgetter.py b/feedgetter.py index aa463b7d..cc4fb5ca 100755 --- a/feedgetter.py +++ b/feedgetter.py @@ -35,6 +35,7 @@ from BeautifulSoup import BeautifulSoup from datetime import datetime import utils +import mongodb feeds_list = [] list_of_threads = [] @@ -53,6 +54,9 @@ class FeedGetter(object): # Create the base if not exists. utils.create_base() + # MongoDB connections + self.articles = mongodb.Articles() + # mutex to protect the SQLite base self.locker = threading.Lock() @@ -113,16 +117,18 @@ class FeedGetter(object): feed_image = a_feed.feed.image.href except: feed_image = "/img/feed-icon-28x28.png" - try: - self.c.execute('insert into feeds values (?,?,?,?,?)', (\ - utils.clear_string(a_feed.feed.title.encode('utf-8')), \ - a_feed.feed.link.encode('utf-8'), \ - feed_link, \ - feed_image, - "0")) - except sqlite3.IntegrityError: - # feed already in the base - pass + + collection_dic = {"collection_id": feed_link,\ + "feed_image": feed_image, \ + "feed_title": utils.clear_string(a_feed.feed.title.encode('utf-8')), \ + "feed_link": feed_link, \ + "site_title": a_feed.feed.link.encode('utf-8'), \ + "mail": False \ + } + + self.articles.add_collection(collection_dic) + + articles = [] for article in a_feed['entries']: description = "" try: @@ -142,37 +148,26 @@ class FeedGetter(object): except: post_date = datetime(*article.published_parsed[:6]) - try: - # try. Will only success if the article is not already in the data base - self.c.execute('insert into articles values (?, ?, ?, ?, ?, ?, ?)', (\ - post_date, \ - article_title, \ - article.link.encode('utf-8'), \ - description, \ - "0", \ - feed_link, \ - "0")) - result = self.c.execute("SELECT mail from feeds WHERE feed_site_link='" + \ - a_feed.feed.link.encode('utf-8') + "'").fetchall() - if result[0][0] == "1": - # if subscribed to the current feed - # send the article by e-mail - try: - threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ - a_feed.feed.title.encode('utf-8'), \ - article_title, description) \ - ).start() - except Exception, e: - # SMTP acces denied, to many SMTP connections, etc. - top = traceback.extract_stack()[-1] - print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1])]) - except sqlite3.IntegrityError: - # article already in the data base - pass - except Exception, e: - # Missing information (updated_parsed, ...) - top = traceback.extract_stack()[-1] - print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1]), str(traceback.extract_stack()[-2][3])]) + + article = {"article_id": article.link.encode('utf-8'), \ + "article_date": post_date, \ + "article_link": article.link.encode('utf-8'), \ + "article_title": article_title, \ + "article_content": description, \ + "article_readed": False, \ + "article_like": False \ + } + + articles.append(article) + + self.articles.add_articles(articles, feed_link) + + # send new articles by e-mail if desired. + #threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \ + #a_feed.feed.title.encode('utf-8'), \ + #article_title, description) \ + #).start() + if __name__ == "__main__": diff --git a/mongodb.py b/mongodb.py index 43ed851e..42aef4bb 100644 --- a/mongodb.py +++ b/mongodb.py @@ -26,6 +26,8 @@ class Articles(object): """ Creates a new collection for a new feed. """ + new_collection["type"] = 0 + name = str(new_collection["collection_id"]) pymongo.collection.Collection(self.db, name) collection = self.db[name] @@ -38,6 +40,7 @@ class Articles(object): """ collection = self.db[str(collection_id)] for article in articles: + article["type"] = 1 cursor = collection.find({"article_id":article["article_id"]}) if cursor.count() == 0: collection.insert(article) @@ -48,8 +51,8 @@ class Articles(object): """ articles = [] collections = self.db.collection_names() - for colliection in collections: - collection = self.db.collection_id + for collection_name in collections: + collection = self.db[collection_name] articles.append(collection) return articles @@ -69,6 +72,7 @@ class Articles(object): print "Article for the collection", collection_id for d in cursor: print d + print def nb_users(self): """ @@ -98,8 +102,7 @@ if __name__ == "__main__": # Create a collection for a stream - collection_dic = {"type": 0, \ - "collection_id": 42,\ + collection_dic = {"collection_id": 42,\ "feed_image": "Image", \ "feed_title": "Title", \ "feed_link": "Link", \ @@ -107,13 +110,12 @@ if __name__ == "__main__": "mail": True, \ } - articles.add_collection(collection_dic) + #articles.add_collection(collection_dic) # Add an article in the newly created collection - article_dic1 = {"type": 1, \ - "article_id": 51, \ + article_dic1 = {"article_id": 51, \ "article_date": "Today", \ "article_link": "Link of the article", \ "article_title": "The title", \ @@ -122,8 +124,7 @@ if __name__ == "__main__": "article_like": True \ } - article_dic2 = {"type": 1, \ - "article_id": 52, \ + article_dic2 = {"article_id": 52, \ "article_date": "Yesterday", \ "article_link": "Link", \ "article_title": "Hello", \ @@ -132,15 +133,15 @@ if __name__ == "__main__": "article_like": True \ } - articles.add_articles([article_dic1, article_dic2], 42) + #articles.add_articles([article_dic1, article_dic2], 42) # Print articles of the collection - articles.print_articles_from_collection(42) + articles.print_articles_from_collection("http://esr.ibiblio.org/?feed=rss2") - print - print articles.list_collections() + print "All articles:" + #print articles.get_all_articles() # Drop the database - articles.drop_database() \ No newline at end of file + #articles.drop_database() \ No newline at end of file -- cgit