aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsource/feedgetter.py15
-rw-r--r--source/sqlite2mongo.py9
-rwxr-xr-xsource/utils.py11
3 files changed, 18 insertions, 17 deletions
diff --git a/source/feedgetter.py b/source/feedgetter.py
index e3469132..351d099d 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -99,10 +99,8 @@ class FeedGetter(object):
except:
feed_image = "/img/feed-icon-28x28.png"
- sha1_hash = hashlib.sha1()
- sha1_hash.update(feed_link.encode('utf-8'))
- feed_id = sha1_hash.hexdigest()
-
+ feed_id = utils.uri_b64encode(feed_link.encode('utf-8'))
+
collection_dic = {"feed_id": feed_id, \
"type": 0, \
"feed_image": feed_image, \
@@ -111,7 +109,7 @@ class FeedGetter(object):
"site_link": a_feed.feed.link.encode('utf-8'), \
"mail": False \
}
-
+
self.articles.add_collection(collection_dic)
articles = []
@@ -134,10 +132,7 @@ class FeedGetter(object):
except:
post_date = datetime(*article.published_parsed[:6])
-
- sha1_hash = hashlib.sha1()
- sha1_hash.update(article.link.encode('utf-8'))
- article_id = sha1_hash.hexdigest()
+ article_id = utils.uri_b64encode(article.link.encode('utf-8'))
article = {"article_id": article_id, \
"type":1, \
@@ -148,7 +143,7 @@ class FeedGetter(object):
"article_readed": False, \
"article_like": False \
}
-
+
articles.append(article)
self.articles.add_articles(articles, feed_id)
diff --git a/source/sqlite2mongo.py b/source/sqlite2mongo.py
index 633fb8f9..6a68d00b 100644
--- a/source/sqlite2mongo.py
+++ b/source/sqlite2mongo.py
@@ -1,7 +1,6 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
-import hashlib
import sqlite3
import mongodb
@@ -36,9 +35,7 @@ def sqlite2mongo():
feed[2] + "'").fetchall()
except:
continue
- sha1_hash = hashlib.sha1()
- sha1_hash.update(feed[2].encode('utf-8'))
- feed_id = sha1_hash.hexdigest()
+ feed_id = utils.uri_b64encode(feed[2].encode('utf-8'))
new_collection = {"feed_id" : feed_id.encode('utf-8'), \
"type": 0, \
@@ -54,9 +51,7 @@ def sqlite2mongo():
# Walk through the list of articles for the current feed.
articles = []
for article in list_of_articles:
- sha1_hash = hashlib.sha1()
- sha1_hash.update(article[2].encode('utf-8'))
- article_id = sha1_hash.hexdigest()
+ article_id = utils.uri_b64encode(article[2].encode('utf-8'))
article = {"article_id": article_id.encode('utf-8'), \
"type":1, \
diff --git a/source/utils.py b/source/utils.py
index c23b8794..78c909f5 100755
--- a/source/utils.py
+++ b/source/utils.py
@@ -94,6 +94,17 @@ url_finders = [ \
re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"), \
]
+
+from base64 import urlsafe_b64encode, urlsafe_b64decode
+
+
+
+def uri_b64encode(s):
+ return urlsafe_b64encode(s).strip('=')
+
+def uri_b64decode(s):
+ return urlsafe_b64decode(s + '=' * (4 - len(s) % 4))
+
def detect_url_errors(list_of_urls):
"""
Detect URL errors.
bgstack15