aboutsummaryrefslogtreecommitdiff
path: root/feedgetter.py
diff options
context:
space:
mode:
Diffstat (limited to 'feedgetter.py')
-rwxr-xr-xfeedgetter.py110
1 files changed, 48 insertions, 62 deletions
diff --git a/feedgetter.py b/feedgetter.py
index aa463b7d..e3469132 100755
--- a/feedgetter.py
+++ b/feedgetter.py
@@ -27,14 +27,15 @@ __license__ = "GPLv3"
import os.path
import traceback
-import sqlite3
import threading
import feedparser
+import hashlib
from BeautifulSoup import BeautifulSoup
from datetime import datetime
import utils
+import mongodb
feeds_list = []
list_of_threads = []
@@ -50,11 +51,8 @@ class FeedGetter(object):
"""
Initializes the base and variables.
"""
- # Create the base if not exists.
- utils.create_base()
-
- # mutex to protect the SQLite base
- self.locker = threading.Lock()
+ # MongoDB connections
+ self.articles = mongodb.Articles()
def retrieve_feed(self):
"""
@@ -84,25 +82,12 @@ class FeedGetter(object):
"""Request the URL
Executed in a thread.
- SQLite objects created in a thread can only be used in that same thread !
"""
- # Protect this part of code.
- self.locker.acquire()
-
- self.conn = sqlite3.connect(utils.sqlite_base, isolation_level = None)
- self.c = self.conn.cursor()
-
if utils.detect_url_errors([the_good_url]) == []:
# if ressource is available add the articles in the base.
- self.add_into_sqlite(the_good_url)
-
- self.conn.commit()
- self.c.close()
+ self.add_into_database(the_good_url)
- # Release this part of code.
- self.locker.release()
-
- def add_into_sqlite(self, feed_link):
+ def add_into_database(self, feed_link):
"""
Add the articles of the feed 'a_feed' in the SQLite base.
"""
@@ -113,16 +98,23 @@ class FeedGetter(object):
feed_image = a_feed.feed.image.href
except:
feed_image = "/img/feed-icon-28x28.png"
- try:
- self.c.execute('insert into feeds values (?,?,?,?,?)', (\
- utils.clear_string(a_feed.feed.title.encode('utf-8')), \
- a_feed.feed.link.encode('utf-8'), \
- feed_link, \
- feed_image,
- "0"))
- except sqlite3.IntegrityError:
- # feed already in the base
- pass
+
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(feed_link.encode('utf-8'))
+ feed_id = sha1_hash.hexdigest()
+
+ collection_dic = {"feed_id": feed_id, \
+ "type": 0, \
+ "feed_image": feed_image, \
+ "feed_title": utils.clear_string(a_feed.feed.title.encode('utf-8')), \
+ "feed_link": feed_link, \
+ "site_link": a_feed.feed.link.encode('utf-8'), \
+ "mail": False \
+ }
+
+ self.articles.add_collection(collection_dic)
+
+ articles = []
for article in a_feed['entries']:
description = ""
try:
@@ -142,37 +134,31 @@ class FeedGetter(object):
except:
post_date = datetime(*article.published_parsed[:6])
- try:
- # try. Will only success if the article is not already in the data base
- self.c.execute('insert into articles values (?, ?, ?, ?, ?, ?, ?)', (\
- post_date, \
- article_title, \
- article.link.encode('utf-8'), \
- description, \
- "0", \
- feed_link, \
- "0"))
- result = self.c.execute("SELECT mail from feeds WHERE feed_site_link='" + \
- a_feed.feed.link.encode('utf-8') + "'").fetchall()
- if result[0][0] == "1":
- # if subscribed to the current feed
- # send the article by e-mail
- try:
- threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \
- a_feed.feed.title.encode('utf-8'), \
- article_title, description) \
- ).start()
- except Exception, e:
- # SMTP acces denied, to many SMTP connections, etc.
- top = traceback.extract_stack()[-1]
- print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1])])
- except sqlite3.IntegrityError:
- # article already in the data base
- pass
- except Exception, e:
- # Missing information (updated_parsed, ...)
- top = traceback.extract_stack()[-1]
- print ", ".join([type(e).__name__, os.path.basename(top[0]), str(top[1]), str(traceback.extract_stack()[-2][3])])
+
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(article.link.encode('utf-8'))
+ article_id = sha1_hash.hexdigest()
+
+ article = {"article_id": article_id, \
+ "type":1, \
+ "article_date": post_date, \
+ "article_link": article.link.encode('utf-8'), \
+ "article_title": article_title, \
+ "article_content": description, \
+ "article_readed": False, \
+ "article_like": False \
+ }
+
+ articles.append(article)
+
+ self.articles.add_articles(articles, feed_id)
+
+ # send new articles by e-mail if desired.
+ #threading.Thread(None, utils.send_mail, None, (utils.mail_from, utils.mail_to, \
+ #a_feed.feed.title.encode('utf-8'), \
+ #article_title, description) \
+ #).start()
+
if __name__ == "__main__":
bgstack15