diff options
author | cedricbonhomme <devnull@localhost> | 2010-02-24 21:57:33 +0100 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-02-24 21:57:33 +0100 |
commit | d6c659b9d7fb04969b466fda28a546b154954a89 (patch) | |
tree | eec875df6749771832d20ff514e8b27e3e6706ee | |
parent | It is now possible to share articles with delicious, Digg, reddit, Scoopeo an... (diff) | |
download | newspipe-d6c659b9d7fb04969b466fda28a546b154954a89.tar.gz newspipe-d6c659b9d7fb04969b466fda28a546b154954a89.tar.bz2 newspipe-d6c659b9d7fb04969b466fda28a546b154954a89.zip |
Major enhancements. Smarter management of database. Image of feeds are retrieved.
-rw-r--r-- | feedgetter.py | 34 | ||||
-rw-r--r-- | pyAggr3g470r.py | 41 | ||||
-rw-r--r-- | utils.py | 69 |
3 files changed, 88 insertions, 56 deletions
diff --git a/feedgetter.py b/feedgetter.py index 631b6374..7cd0a812 100644 --- a/feedgetter.py +++ b/feedgetter.py @@ -39,11 +39,13 @@ class FeedGetter(object): sqlite3.register_adapter(str, lambda s : s.decode('utf-8')) self.conn = sqlite3.connect("./var/feed.db", isolation_level = None) self.c = self.conn.cursor() - self.c.execute('''create table if not exists rss_feed + self.c.execute('''create table if not exists feeds + (feed_title text, feed_site_link text PRIMARY KEY, \ + feed_link text, feed_image_link text)''') + self.c.execute('''create table if not exists articles (article_date text, article_title text, \ article_link text PRIMARY KEY, article_description text, \ - feed_title text, feed_site_link text, \ - article_readed text)''') + article_readed text, feed_link text)''') self.conn.commit() self.c.close() @@ -87,7 +89,7 @@ class FeedGetter(object): self.c = self.conn.cursor() # Add the articles in the base. - self.add_into_sqlite(feedparser.parse(the_good_url)) + self.add_into_sqlite(the_good_url) self.conn.commit() self.c.close() @@ -95,10 +97,25 @@ class FeedGetter(object): # Release this part of code. self.locker.release() - def add_into_sqlite(self, a_feed): + def add_into_sqlite(self, feed_link): """ Add the articles of the feed 'a_feed' in the SQLite base. """ + a_feed = feedparser.parse(feed_link) + if a_feed['entries'] == []: + return + try: + feed_image = a_feed.feed.image.href + except: + feed_image = "" + try: + self.c.execute('insert into feeds values (?,?,?,?)', (\ + a_feed.feed.title.encode('utf-8'), \ + a_feed.feed.link.encode('utf-8'), \ + feed_link, \ + feed_image)) + except sqlite3.IntegrityError: + pass for article in a_feed['entries']: try: description = article.description.encode('utf-8') @@ -110,14 +127,13 @@ class FeedGetter(object): article_id = sha256_hash.hexdigest() try: - self.c.execute('insert into rss_feed values (?,?,?,?,?,?,?)', (\ + self.c.execute('insert into articles values (?,?,?,?,?,?)', (\ datetime(*article.updated_parsed[:6]), \ article.title.encode('utf-8'), \ article.link.encode('utf-8'), \ description, \ - a_feed.feed.title.encode('utf-8'), \ - a_feed.feed.link.encode('utf-8'), \ - "0")) + "0", \ + feed_link)) except sqlite3.IntegrityError: pass diff --git a/pyAggr3g470r.py b/pyAggr3g470r.py index 7c45cc9c..53145ee7 100644 --- a/pyAggr3g470r.py +++ b/pyAggr3g470r.py @@ -83,10 +83,12 @@ class Root: html += """</div>\n<div class="left inner">\n""" for rss_feed_id in self.dic.keys(): - html += '<h2><a name="' + rss_feed_id + '">' + \ - '<a href="' + self.dic[rss_feed_id][0][6].encode('utf-8') + \ - '" rel="noreferrer" target="_blank">' + \ - self.dic[rss_feed_id][0][5].encode('utf-8') + "</a></a></h2>\n" + html += """<h2><a name="%s"><a href="%s" rel="noreferrer" + target="_blank">%s"</a></a> + <img src="%s" width="20" height="20" /></h2>\n""" % \ + (rss_feed_id, self.dic[rss_feed_id][0][6].encode('utf-8'), \ + self.dic[rss_feed_id][0][5].encode('utf-8'), \ + self.dic_info[rss_feed_id][2].encode('utf-8')) # The main page display only 10 articles by feeds. for article in self.dic[rss_feed_id][:10]: @@ -154,19 +156,20 @@ class Root: type="submit" value="Delete all articles"></form>\n""" html += "<hr />\n" - html += "<h1>Statistics</h1>\n" - top_words = utils.top_words(self.dic, 10) - - html += "<table border=0>\n<tr><td>" - html += "<ol>\n" - for word, frequency in top_words: - html += """\t<li><a href="/q/?querystring=%s">%s</a>: %s</li>\n""" % \ - (word, word, frequency) - html += "</ol>\n</td><td>" - utils.create_histogram(top_words) - html += """<img src="/var/histogram.png" /></td></tr></table>""" + if self.dic: + html += "<h1>Statistics</h1>\n" + top_words = utils.top_words(self.dic, 10) + + html += "<table border=0>\n<tr><td>" + html += "<ol>\n" + for word, frequency in top_words: + html += """\t<li><a href="/q/?querystring=%s">%s</a>: %s</li>\n""" % \ + (word, word, frequency) + html += "</ol>\n</td><td>" + utils.create_histogram(top_words) + html += """<img src="/var/histogram.png" /></td></tr></table>""" + html += "<hr />\n" - html += "<hr />\n" html += htmlfooter return html @@ -384,13 +387,13 @@ class Root: c = conn.cursor() # Mark all articles as read. if param == "All": - c.execute("UPDATE rss_feed SET article_readed=1") + c.execute("UPDATE articles SET article_readed=1") # Mark all articles from a feed as read. elif param == "Feed" or param == "Feed_FromMainPage": - c.execute("UPDATE rss_feed SET article_readed=1 WHERE feed_site_link='" + self.dic[identifiant][0][6] + "'") + c.execute("UPDATE articles SET article_readed=1 WHERE feed_site_link='" + self.dic[identifiant][0][6] + "'") # Mark an article as read. elif param == "Article": - c.execute("UPDATE rss_feed SET article_readed=1 WHERE article_link='" + identifiant + "'") + c.execute("UPDATE articles SET article_readed=1 WHERE article_link='" + identifiant + "'") conn.commit() c.close() except Exception, e: @@ -111,12 +111,13 @@ def load_feed(): """ Load feeds in a dictionary. """ + list_of_feeds = None list_of_articles = None try: conn = sqlite3.connect("./var/feed.db", isolation_level = None) c = conn.cursor() - list_of_articles = c.execute("SELECT * FROM rss_feed").fetchall() - c.close() + list_of_feeds = c.execute("SELECT * FROM feeds").fetchall() + #c.close() except: pass @@ -124,33 +125,45 @@ def load_feed(): # dic[feed_id] = (article_id, article_date, article_title, # article_link, article_description, feed_title, # feed_link, article_readed) - # dic_info[feed_id] = (nb_article, nb_article_unreaded) + # dic_info[feed_id] = (nb_article, nb_article_unreaded, feed_image) dic, dic_info = {}, {} - if list_of_articles is not None: - for article in list_of_articles: - sha256_hash = hashlib.sha256() - sha256_hash.update(article[5].encode('utf-8')) - feed_id = sha256_hash.hexdigest() - sha256_hash.update(article[2].encode('utf-8')) - article_id = sha256_hash.hexdigest() - - article_list = [article_id, article[0], article[1], \ - article[2], article[3], article[4], article[5], article[6]] - - if feed_id not in dic: - dic[feed_id] = [article_list] - else: - dic[feed_id].append(article_list) - - # sort articles by date for each feeds - for feeds in dic.keys(): - dic[feeds].sort(lambda x,y: compare(y[1], x[1])) - - for rss_feed_id in dic.keys(): - dic_info[rss_feed_id] = (len(dic[rss_feed_id]), \ - len([article for article in dic[rss_feed_id] \ - if article[7]=="0"]) \ - ) + if list_of_feeds is not None: + for feed in list_of_feeds: + feed_title = feed[0] + feed_site_link = feed[1] + feed_link = feed[2] + feed_image = feed[3] + + list_of_articles = c.execute(\ + "SELECT * FROM articles WHERE feed_link='" + \ + feed_link + "'").fetchall() + + if list_of_articles is not None: + for article in list_of_articles: + sha256_hash = hashlib.sha256() + sha256_hash.update(article[5].encode('utf-8')) + feed_id = sha256_hash.hexdigest() + sha256_hash.update(article[2].encode('utf-8')) + article_id = sha256_hash.hexdigest() + + article_list = [article_id, article[0], article[1], \ + article[2], article[3], feed_title, feed_link, article[4]] + + if feed_id not in dic: + dic[feed_id] = [article_list] + else: + dic[feed_id].append(article_list) + + # sort articles by date for each feeds + for feeds in dic.keys(): + dic[feeds].sort(lambda x,y: compare(y[1], x[1])) + + dic_info[feed_id] = (len(dic[feed_id]), \ + len([article for article in dic[feed_id] \ + if article[7]=="0"]), \ + feed_image + ) + c.close() return (dic, dic_info) return (dic, dic_info)
\ No newline at end of file |