From 8ad800252042839a5d7d0ea3cfaea0a56987efcc Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Fri, 20 Apr 2012 14:30:16 +0200 Subject: SHA1 is replaced by urlsafe_b64encode for id of articles. --- source/feedgetter.py | 15 +++++---------- source/sqlite2mongo.py | 9 ++------- source/utils.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'source') diff --git a/source/feedgetter.py b/source/feedgetter.py index e3469132..351d099d 100755 --- a/source/feedgetter.py +++ b/source/feedgetter.py @@ -99,10 +99,8 @@ class FeedGetter(object): except: feed_image = "/img/feed-icon-28x28.png" - sha1_hash = hashlib.sha1() - sha1_hash.update(feed_link.encode('utf-8')) - feed_id = sha1_hash.hexdigest() - + feed_id = utils.uri_b64encode(feed_link.encode('utf-8')) + collection_dic = {"feed_id": feed_id, \ "type": 0, \ "feed_image": feed_image, \ @@ -111,7 +109,7 @@ class FeedGetter(object): "site_link": a_feed.feed.link.encode('utf-8'), \ "mail": False \ } - + self.articles.add_collection(collection_dic) articles = [] @@ -134,10 +132,7 @@ class FeedGetter(object): except: post_date = datetime(*article.published_parsed[:6]) - - sha1_hash = hashlib.sha1() - sha1_hash.update(article.link.encode('utf-8')) - article_id = sha1_hash.hexdigest() + article_id = utils.uri_b64encode(article.link.encode('utf-8')) article = {"article_id": article_id, \ "type":1, \ @@ -148,7 +143,7 @@ class FeedGetter(object): "article_readed": False, \ "article_like": False \ } - + articles.append(article) self.articles.add_articles(articles, feed_id) diff --git a/source/sqlite2mongo.py b/source/sqlite2mongo.py index 633fb8f9..6a68d00b 100644 --- a/source/sqlite2mongo.py +++ b/source/sqlite2mongo.py @@ -1,7 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -import hashlib import sqlite3 import mongodb @@ -36,9 +35,7 @@ def sqlite2mongo(): feed[2] + "'").fetchall() except: continue - sha1_hash = hashlib.sha1() - sha1_hash.update(feed[2].encode('utf-8')) - feed_id = sha1_hash.hexdigest() + feed_id = utils.uri_b64encode(feed[2].encode('utf-8')) new_collection = {"feed_id" : feed_id.encode('utf-8'), \ "type": 0, \ @@ -54,9 +51,7 @@ def sqlite2mongo(): # Walk through the list of articles for the current feed. articles = [] for article in list_of_articles: - sha1_hash = hashlib.sha1() - sha1_hash.update(article[2].encode('utf-8')) - article_id = sha1_hash.hexdigest() + article_id = utils.uri_b64encode(article[2].encode('utf-8')) article = {"article_id": article_id.encode('utf-8'), \ "type":1, \ diff --git a/source/utils.py b/source/utils.py index c23b8794..78c909f5 100755 --- a/source/utils.py +++ b/source/utils.py @@ -94,6 +94,17 @@ url_finders = [ \ re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"), \ ] + +from base64 import urlsafe_b64encode, urlsafe_b64decode + + + +def uri_b64encode(s): + return urlsafe_b64encode(s).strip('=') + +def uri_b64decode(s): + return urlsafe_b64decode(s + '=' * (4 - len(s) % 4)) + def detect_url_errors(list_of_urls): """ Detect URL errors. -- cgit