aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mongodb.py22
-rw-r--r--sqlite2mongo.py82
2 files changed, 90 insertions, 14 deletions
diff --git a/mongodb.py b/mongodb.py
index 0eb4e002..a37b80c0 100644
--- a/mongodb.py
+++ b/mongodb.py
@@ -27,9 +27,8 @@ class Articles(object):
"""
Creates a new collection for a new feed.
"""
- #pymongo.collection.Collection(self.db, new_collection["feed_id"])
collection = self.db[new_collection["feed_id"]]
- collection.create_index([("article_link", pymongo.ASCENDING)], {"unique":True, "sparse":True})
+ #collection.create_index([("feed_link", pymongo.ASCENDING)], {"unique":True, "sparse":True})
collection.insert(new_collection)
def add_articles(self, articles, feed_id):
@@ -37,6 +36,10 @@ class Articles(object):
Add article(s) in a collection.
"""
collection = self.db[str(feed_id)]
+
+ collection.create_index([("article_link", pymongo.ASCENDING), ("article_date", pymongo.DESCENDING)], \
+ {"unique":False, "sparse":False})
+
for article in articles:
cursor = collection.find({"article_id":article["article_id"]})
if cursor.count() == 0:
@@ -102,7 +105,8 @@ class Articles(object):
cursor = collection.find({"type":1})
else:
cursor = collection.find({"type":1, condition[0]:condition[1]})
- return cursor.sort([("article_date", pymongo.DESCENDING)])
+ #return cursor.sort([("article_date", pymongo.DESCENDING)])
+ return cursor
def print_articles_from_collection(self, collection_id):
"""
@@ -245,16 +249,6 @@ if __name__ == "__main__":
#print articles.get_all_articles()
-
-
- for feed in articles.get_all_collections():
- for article in articles.get_articles_from_collection(feed["feed_id"]):
- try:
- #print article["article_title"], article["article_date"]
- pass
- except:
- pass
-
# Drop the database
- #articles.drop_database() \ No newline at end of file
+ articles.drop_database() \ No newline at end of file
diff --git a/sqlite2mongo.py b/sqlite2mongo.py
new file mode 100644
index 00000000..b129ebce
--- /dev/null
+++ b/sqlite2mongo.py
@@ -0,0 +1,82 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import hashlib
+
+import sqlite3
+import mongodb
+
+SQLITE_BASE = "./var/feed.db"
+
+
+def load_feed():
+ """
+ Load feeds and articles in a dictionary.
+ """
+ mongo = mongodb.Articles()
+ list_of_feeds = []
+ list_of_articles = []
+
+ try:
+ conn = sqlite3.connect(SQLITE_BASE, isolation_level = None)
+ c = conn.cursor()
+ list_of_feeds = c.execute("SELECT * FROM feeds").fetchall()
+ except:
+ pass
+
+
+ if list_of_feeds != []:
+ # Walk through the list of feeds
+ for feed in list_of_feeds:
+ try:
+ list_of_articles = c.execute(\
+ "SELECT * FROM articles WHERE feed_link='" + \
+ feed[2] + "'").fetchall()
+ except:
+ continue
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(feed[2].encode('utf-8'))
+ feed_id = sha1_hash.hexdigest()
+
+
+ new_collection = {"feed_id" : feed_id.encode('utf-8'), \
+ "type": 0, \
+ "feed_image" : feed[3].encode('utf-8'), \
+ "feed_title" : feed[0].encode('utf-8'), \
+ "feed_link" : feed[2].encode('utf-8'), \
+ "site_link" : feed[1].encode('utf-8'), \
+ "mail" : feed[4]=="1"}
+
+
+ mongo.add_collection(new_collection)
+
+
+ if list_of_articles != []:
+ # Walk through the list of articles for the current feed.
+ articles = []
+ for article in list_of_articles:
+ sha1_hash = hashlib.sha1()
+ sha1_hash.update(article[2].encode('utf-8'))
+ article_id = sha1_hash.hexdigest()
+
+
+ article = {"article_id": article_id.encode('utf-8'), \
+ "type":1, \
+ "article_date": article[0].encode('utf-8'), \
+ "article_link": article[2].encode('utf-8'), \
+ "article_title": article[1].encode('utf-8'), \
+ "article_content": article[3].encode('utf-8'), \
+ "article_readed": article[4]=="1", \
+ "article_like": article[6]=="1" \
+ }
+
+ articles.append(article)
+
+
+ mongo.add_articles(articles, feed_id)
+
+ c.close()
+
+
+if __name__ == "__main__":
+ load_feed() \ No newline at end of file
bgstack15