aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcedricbonhomme <devnull@localhost>2012-05-01 14:10:25 +0200
committercedricbonhomme <devnull@localhost>2012-05-01 14:10:25 +0200
commitba14debbde6cde5d77f5d57e67d4bef341042857 (patch)
treeca50a7169cd0f86b472c6a36fd4ed1dd4e9276c5
parentExport tp webzine nearly OK. (diff)
downloadnewspipe-ba14debbde6cde5d77f5d57e67d4bef341042857.tar.gz
newspipe-ba14debbde6cde5d77f5d57e67d4bef341042857.tar.bz2
newspipe-ba14debbde6cde5d77f5d57e67d4bef341042857.zip
urlsafe_b64encode is replaced by SHA1 for id of articles.
-rwxr-xr-xsource/feedgetter.py9
-rw-r--r--source/sqlite2mongo.py9
-rwxr-xr-xsource/utils.py14
3 files changed, 14 insertions, 18 deletions
diff --git a/source/feedgetter.py b/source/feedgetter.py
index aa25f2a3..59322e6a 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -26,6 +26,7 @@ __revision__ = "$Date: 2012/04/22 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "GPLv3"
+import hashlib
import threading
import feedparser
from BeautifulSoup import BeautifulSoup
@@ -96,7 +97,9 @@ class FeedGetter(object):
except:
feed_image = "/img/feed-icon-28x28.png"
- feed_id = utils.uri_b64encode(feed_link.encode('utf-8'))
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(feed_link.encode('utf-8'))
+ feed_id = sha1_hash.hexdigest()
collection_dic = {"feed_id": feed_id, \
"type": 0, \
@@ -129,7 +132,9 @@ class FeedGetter(object):
except:
post_date = datetime(*article.published_parsed[:6])
- article_id = utils.uri_b64encode(article.link.encode('utf-8'))
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(article.link.encode('utf-8'))
+ article_id = sha1_hash.hexdigest()
article = {"article_id": article_id, \
"type":1, \
diff --git a/source/sqlite2mongo.py b/source/sqlite2mongo.py
index c4bb4e17..ecb0ec7f 100644
--- a/source/sqlite2mongo.py
+++ b/source/sqlite2mongo.py
@@ -1,6 +1,7 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
+import hashlib
import sqlite3
import mongodb
@@ -34,7 +35,9 @@ def sqlite2mongo():
feed[2] + "'").fetchall()
except:
continue
- feed_id = utils.uri_b64encode(feed[2].encode('utf-8'))
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(feed[2].encode('utf-8'))
+ feed_id = sha1_hash.hexdigest()
new_collection = {"feed_id" : feed_id.encode('utf-8'), \
"type": 0, \
@@ -50,7 +53,9 @@ def sqlite2mongo():
# Walk through the list of articles for the current feed.
articles = []
for article in list_of_articles:
- article_id = utils.uri_b64encode(article[2].encode('utf-8'))
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(article[2].encode('utf-8'))
+ article_id = sha1_hash.hexdigest()
article = {"article_id": article_id.encode('utf-8'), \
"type":1, \
diff --git a/source/utils.py b/source/utils.py
index d1d2c684..da68550b 100755
--- a/source/utils.py
+++ b/source/utils.py
@@ -64,20 +64,6 @@ url_finders = [ \
re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"), \
]
-from base64 import urlsafe_b64encode, urlsafe_b64decode
-
-def uri_b64encode(s):
- """
- Encode an URI in base 64 and remove the final '='.
- """
- return urlsafe_b64encode(s).strip('=')
-
-def uri_b64decode(s):
- """
- Decode a base 64 encoded URI.
- """
- return urlsafe_b64decode(s + '=' * (4 - len(s) % 4))
-
def detect_url_errors(list_of_urls):
"""
Detect URL errors.
bgstack15