diff options
Diffstat (limited to 'utils.py')
-rwxr-xr-x | utils.py | 164 |
1 files changed, 17 insertions, 147 deletions
@@ -43,8 +43,6 @@ import calendar import unicodedata import htmlentitydefs -import articles - import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText @@ -72,15 +70,22 @@ try: except: config.read("./cfg/pyAggr3g470r.cfg-sample") path = os.path.abspath(".") -sqlite_base = os.path.abspath(config.get('global', 'sqlitebase')) + +MONGODB_ADDRESS = config.get('MongoDB', 'address') +MONGODB_PORT = int(config.get('MongoDB', 'port')) +MONGODB_USER = config.get('MongoDB', 'user') +MONGODB_PASSWORD = config.get('MongoDB', 'password') + MAX_NB_ARTICLES = int(config.get('global', 'max_nb_articles')) -DIASPORA_POD = config.get('misc', 'diaspora_pod') + mail_from = config.get('mail','mail_from') mail_to = config.get('mail','mail_to') smtp_server = config.get('mail','smtp') username = config.get('mail','username') password = config.get('mail','password') +DIASPORA_POD = config.get('misc', 'diaspora_pod') + # regular expression to chech URL url_finders = [ \ re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?/[-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]*[^]'\\.}>\\),\\\"]"), \ @@ -165,16 +170,15 @@ def normalize_filename(name): file_name = strip_accents(file_name, "utf-8") return os.path.normpath(file_name) -def top_words(feeds, n=10, size=5): +def top_words(articles, n=10, size=5): """ Return the n most frequent words in a list. """ words = Counter() wordre = re.compile(r'\b\w{%s,}\b' % size, re.I) - for feed in feeds.values(): - for article in feed.articles.values(): - for word in wordre.findall(clear_string(article.article_description)): - words[word.lower()] += 1 + for article in articles: + for word in wordre.findall(clear_string(article["article_content"])): + words[word.lower()] += 1 return words.most_common(n) def tag_cloud(tags, query="word_count"): @@ -184,15 +188,15 @@ def tag_cloud(tags, query="word_count"): tags.sort(key=operator.itemgetter(0)) if query == "word_count": # tags cloud from the management page - return ' '.join([('<font size=%d><a href="/q/?querystring=%s" title="Count: %s">%s</a></font>\n' % \ + return ' '.join([('<font size=%d><a href="/search/?query=%s" title="Count: %s">%s</a></font>\n' % \ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, count, word)) \ for (word, count) in tags]) if query == "year": # tags cloud for the history - return ' '.join([('<font size=%d><a href="/history/?querystring=%s:%s" title="Count: %s">%s</a></font>\n' % \ + return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, count, word)) \ for (word, count) in tags]) - return ' '.join([('<font size=%d><a href="/history/?querystring=%s:%s" title="Count: %s">%s</a></font>\n' % \ + return ' '.join([('<font size=%d><a href="/history/?query=%s:%s" title="Count: %s">%s</a></font>\n' % \ (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), query, word, count, calendar.month_name[int(word)])) \ for (word, count) in tags]) @@ -327,16 +331,6 @@ def remove_feed(feed_url): feeds.append(line.replace("\n", "")) with open("./var/feed.lst", "w") as f: f.write("\n".join(feeds) + "\n") - # Remove articles from this feed from the SQLite base. - try: - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - c.execute("DELETE FROM feeds WHERE feed_link='" + feed_url +"'") - c.execute("DELETE FROM articles WHERE feed_link='" + feed_url +"'") - conn.commit() - c.close() - except: - pass def search_feed(url): """ @@ -354,128 +348,4 @@ def search_feed(url): if url not in feed_link['href']: return urlparse.urljoin(url, feed_link['href']) return feed_link['href'] - return None - -def create_base(): - """ - Create the base if not exists. - """ - sqlite3.register_adapter(str, lambda s : s.decode('utf-8')) - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - # table of feeds - c.execute('''create table if not exists feeds - (feed_title text, feed_site_link text, \ - feed_link text PRIMARY KEY, feed_image_link text, - mail text)''') - # table of articles - c.execute('''create table if not exists articles - (article_date text, article_title text, \ - article_link text PRIMARY KEY, article_description text, \ - article_readed text, feed_link text, like text)''') - conn.commit() - c.close() - -def drop_base(): - """ - Delete all articles from the database. - """ - sqlite3.register_adapter(str, lambda s : s.decode('utf-8')) - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - c.execute('''DROP TABLE IF EXISTS feeds''') - c.execute('''DROP TABLE IF EXISTS articles''') - conn.commit() - c.close() - -def load_feed(): - """ - Load feeds and articles in a dictionary. - """ - LOCKER.acquire() - list_of_feeds = [] - list_of_articles = [] - try: - conn = sqlite3.connect(sqlite_base, isolation_level = None) - c = conn.cursor() - list_of_feeds = c.execute("SELECT * FROM feeds").fetchall() - except: - pass - - nb_articles = 0 - nb_unread_articles = 0 - nb_mail_notifications = 0 - nb_favorites = 0 - - # Contains the list of Feed object. - feeds = OrderedDict() - - if list_of_feeds != []: - # Case-insensitive sorting - tupleList = [(x[0].lower(), x) for x in list_of_feeds] - tupleList.sort(key=operator.itemgetter(0)) - - # Walk through the list of feeds - for feed in [x[1] for x in tupleList]: - try: - list_of_articles = c.execute(\ - "SELECT * FROM articles WHERE feed_link='" + \ - feed[2] + "'").fetchall() - except: - LOCKER.release() - continue - sha1_hash = hashlib.sha1() - sha1_hash.update(feed[2].encode('utf-8')) - feed_id = sha1_hash.hexdigest() - - # Current Feed object - feed_object = articles.Feed() - feed_object.feed_id = feed_id.encode('utf-8') - feed_object.feed_title = feed[0].encode('utf-8') - feed_object.feed_image = feed[3].encode('utf-8') - feed_object.feed_link = feed[2].encode('utf-8') - feed_object.feed_site_link = feed[1].encode('utf-8') - feed_object.mail = feed[4] - - if list_of_articles != []: - list_of_articles.sort(lambda x,y: compare(y[0], x[0])) - if MAX_NB_ARTICLES != -1: - list_of_articles = list_of_articles[:MAX_NB_ARTICLES] - # Walk through the list of articles for the current feed. - for article in list_of_articles: - sha1_hash = hashlib.sha1() - sha1_hash.update(article[2].encode('utf-8')) - article_id = sha1_hash.hexdigest() - - # Current Article object - article_object = articles.Article() - article_object.article_id = article_id.encode('utf-8') - article_object.article_date = article[0].encode('utf-8') - article_object.article_title = unescape(article[1]).encode('utf-8') - article_object.article_link = article[2].encode('utf-8') - article_object.article_description = unescape(article[3]).encode('utf-8') - article_object.article_readed = article[4] - article_object.like = article[6] - - feed_object.articles[article_id] = article_object - - # update the number of favorites articles - nb_favorites = nb_favorites + int(article[6]) - - - # informations about a feed - feed_object.nb_articles = len(feed_object.articles) - feed_object.nb_unread_articles = len([article for article in feed_object.articles.values() \ - if article.article_readed=="0"]) - - feeds[feed_id] = feed_object - - nb_articles += feed_object.nb_articles - nb_unread_articles += feed_object.nb_unread_articles - nb_mail_notifications += int(feed_object.mail) - - c.close() - LOCKER.release() - return (feeds, nb_articles, nb_unread_articles, nb_favorites, nb_mail_notifications) - LOCKER.release() - return (feeds, nb_articles, nb_unread_articles, nb_favorites, nb_mail_notifications) + return None
\ No newline at end of file |