aboutsummaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rwxr-xr-xutils.py164
1 files changed, 17 insertions, 147 deletions
diff --git a/utils.py b/utils.py
index 0f5453c3..c23b8794 100755
--- a/utils.py
+++ b/utils.py
@@ -43,8 +43,6 @@ import calendar
import unicodedata
import htmlentitydefs
-import articles
-
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
@@ -72,15 +70,22 @@ try:
except:
config.read("./cfg/pyAggr3g470r.cfg-sample")
path = os.path.abspath(".")
-sqlite_base = os.path.abspath(config.get('global', 'sqlitebase'))
+
+MONGODB_ADDRESS = config.get('MongoDB', 'address')
+MONGODB_PORT = int(config.get('MongoDB', 'port'))
+MONGODB_USER = config.get('MongoDB', 'user')
+MONGODB_PASSWORD = config.get('MongoDB', 'password')
+
MAX_NB_ARTICLES = int(config.get('global', 'max_nb_articles'))
-DIASPORA_POD = config.get('misc', 'diaspora_pod')
+
mail_from = config.get('mail','mail_from')
mail_to = config.get('mail','mail_to')
smtp_server = config.get('mail','smtp')
username = config.get('mail','username')
password = config.get('mail','password')
+DIASPORA_POD = config.get('misc', 'diaspora_pod')
+
# regular expression to chech URL
url_finders = [ \
re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?/[-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]*[^]'\\.}>\\),\\\"]"), \
@@ -165,16 +170,15 @@ def normalize_filename(name):
file_name = strip_accents(file_name, "utf-8")
return os.path.normpath(file_name)
-def top_words(feeds, n=10, size=5):
+def top_words(articles, n=10, size=5):
"""
Return the n most frequent words in a list.
"""
words = Counter()
wordre = re.compile(r'\b\w{%s,}\b' % size, re.I)
- for feed in feeds.values():
- for article in feed.articles.values():
- for word in wordre.findall(clear_string(article.article_description)):
- words[word.lower()] += 1
+ for article in articles:
+ for word in wordre.findall(clear_string(article["article_content"])):
+ words[word.lower()] += 1
return words.most_common(n)
def tag_cloud(tags, query="word_count"):
@@ -184,15 +188,15 @@ def tag_cloud(tags, query="word_count"):
tags.sort(key=operator.itemgetter(0))
if query == "word_count":
# tags cloud from the management page
- return ' '.join([('<font size=%d><a href="/q/?querystring=%s" title="Count: %s">%s</a></font>\n' % \
+ return ' '.join([('<font size=%d><a href="/search/?query=%s" title="Count: %s">%s</a></font>\n' % \
(min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, count, word)) \
for (word, count) in tags])
if query == "year":
# tags cloud for the history
- return ' '.join([('<font size=%d><a href="/history/?querystring=%s:%s" title="Count: %s">%s</a></font>\n' % \
+ return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \
(min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, count, word)) \
for (word, count) in tags])
- return ' '.join([('<font size=%d><a href="/history/?querystring=%s:%s" title="Count: %s">%s</a></font>\n' % \
+ return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \
(min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, count, calendar.month_name[int(word)])) \
for (word, count) in tags])
@@ -327,16 +331,6 @@ def remove_feed(feed_url):
feeds.append(line.replace("\n", ""))
with open("./var/feed.lst", "w") as f:
f.write("\n".join(feeds) + "\n")
- # Remove articles from this feed from the SQLite base.
- try:
- conn = sqlite3.connect(sqlite_base, isolation_level = None)
- c = conn.cursor()
- c.execute("DELETE FROM feeds WHERE feed_link='" + feed_url +"'")
- c.execute("DELETE FROM articles WHERE feed_link='" + feed_url +"'")
- conn.commit()
- c.close()
- except:
- pass
def search_feed(url):
"""
@@ -354,128 +348,4 @@ def search_feed(url):
if url not in feed_link['href']:
return urlparse.urljoin(url, feed_link['href'])
return feed_link['href']
- return None
-
-def create_base():
- """
- Create the base if not exists.
- """
- sqlite3.register_adapter(str, lambda s : s.decode('utf-8'))
- conn = sqlite3.connect(sqlite_base, isolation_level = None)
- c = conn.cursor()
- # table of feeds
- c.execute('''create table if not exists feeds
- (feed_title text, feed_site_link text, \
- feed_link text PRIMARY KEY, feed_image_link text,
- mail text)''')
- # table of articles
- c.execute('''create table if not exists articles
- (article_date text, article_title text, \
- article_link text PRIMARY KEY, article_description text, \
- article_readed text, feed_link text, like text)''')
- conn.commit()
- c.close()
-
-def drop_base():
- """
- Delete all articles from the database.
- """
- sqlite3.register_adapter(str, lambda s : s.decode('utf-8'))
- conn = sqlite3.connect(sqlite_base, isolation_level = None)
- c = conn.cursor()
- c.execute('''DROP TABLE IF EXISTS feeds''')
- c.execute('''DROP TABLE IF EXISTS articles''')
- conn.commit()
- c.close()
-
-def load_feed():
- """
- Load feeds and articles in a dictionary.
- """
- LOCKER.acquire()
- list_of_feeds = []
- list_of_articles = []
- try:
- conn = sqlite3.connect(sqlite_base, isolation_level = None)
- c = conn.cursor()
- list_of_feeds = c.execute("SELECT * FROM feeds").fetchall()
- except:
- pass
-
- nb_articles = 0
- nb_unread_articles = 0
- nb_mail_notifications = 0
- nb_favorites = 0
-
- # Contains the list of Feed object.
- feeds = OrderedDict()
-
- if list_of_feeds != []:
- # Case-insensitive sorting
- tupleList = [(x[0].lower(), x) for x in list_of_feeds]
- tupleList.sort(key=operator.itemgetter(0))
-
- # Walk through the list of feeds
- for feed in [x[1] for x in tupleList]:
- try:
- list_of_articles = c.execute(\
- "SELECT * FROM articles WHERE feed_link='" + \
- feed[2] + "'").fetchall()
- except:
- LOCKER.release()
- continue
- sha1_hash = hashlib.sha1()
- sha1_hash.update(feed[2].encode('utf-8'))
- feed_id = sha1_hash.hexdigest()
-
- # Current Feed object
- feed_object = articles.Feed()
- feed_object.feed_id = feed_id.encode('utf-8')
- feed_object.feed_title = feed[0].encode('utf-8')
- feed_object.feed_image = feed[3].encode('utf-8')
- feed_object.feed_link = feed[2].encode('utf-8')
- feed_object.feed_site_link = feed[1].encode('utf-8')
- feed_object.mail = feed[4]
-
- if list_of_articles != []:
- list_of_articles.sort(lambda x,y: compare(y[0], x[0]))
- if MAX_NB_ARTICLES != -1:
- list_of_articles = list_of_articles[:MAX_NB_ARTICLES]
- # Walk through the list of articles for the current feed.
- for article in list_of_articles:
- sha1_hash = hashlib.sha1()
- sha1_hash.update(article[2].encode('utf-8'))
- article_id = sha1_hash.hexdigest()
-
- # Current Article object
- article_object = articles.Article()
- article_object.article_id = article_id.encode('utf-8')
- article_object.article_date = article[0].encode('utf-8')
- article_object.article_title = unescape(article[1]).encode('utf-8')
- article_object.article_link = article[2].encode('utf-8')
- article_object.article_description = unescape(article[3]).encode('utf-8')
- article_object.article_readed = article[4]
- article_object.like = article[6]
-
- feed_object.articles[article_id] = article_object
-
- # update the number of favorites articles
- nb_favorites = nb_favorites + int(article[6])
-
-
- # informations about a feed
- feed_object.nb_articles = len(feed_object.articles)
- feed_object.nb_unread_articles = len([article for article in feed_object.articles.values() \
- if article.article_readed=="0"])
-
- feeds[feed_id] = feed_object
-
- nb_articles += feed_object.nb_articles
- nb_unread_articles += feed_object.nb_unread_articles
- nb_mail_notifications += int(feed_object.mail)
-
- c.close()
- LOCKER.release()
- return (feeds, nb_articles, nb_unread_articles, nb_favorites, nb_mail_notifications)
- LOCKER.release()
- return (feeds, nb_articles, nb_unread_articles, nb_favorites, nb_mail_notifications)
+ return None \ No newline at end of file
bgstack15