Cleaned code.

author: Cédric Bonhomme <cedric@cedricbonhomme.org> 2014-04-27 02:09:26 +0200
committer: Cédric Bonhomme <cedric@cedricbonhomme.org> 2014-04-27 02:09:26 +0200
commit: 89cf405ab970c4e289b7b79485b27aed8edb1a41 (patch)
tree: 7f6330fb7b9bab82a84aeb2a639377b9c4fe1d80 /pyaggr3g470r
parent: This fixes #3. (diff)
download: newspipe-89cf405ab970c4e289b7b79485b27aed8edb1a41.tar.gz
newspipe-89cf405ab970c4e289b7b79485b27aed8edb1a41.tar.bz2
newspipe-89cf405ab970c4e289b7b79485b27aed8edb1a41.zip
7 files changed, 79 insertions, 50 deletions
diff --git a/pyaggr3g470r/__init__.py b/pyaggr3g470r/__init__.py
index e948a187..a8211217 100644
--- a/pyaggr3g470r/__init__.py
+++ b/pyaggr3g470r/__init__.py
@@ -19,6 +19,7 @@ db = SQLAlchemy(app)
 
 ALLOWED_EXTENSIONS = set(['xml', 'opml'])
 
+
 def allowed_file(filename):
     """
     Check if the uploaded WSW file is allowed.
@@ -34,7 +35,7 @@ if not conf.ON_HEROKU:
     app.config["MAIL_USERNAME"] = conf.MAIL_USERNAME
     app.config["MAIL_PASSWORD"] = conf.MAIL_PASSWORD
 
-    from flask.ext.mail import Message, Mail
+    from flask.ext.mail import Mail
     mail = Mail(app)
 
 # Gravatar
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index dade3bea..ea149f5e 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -31,7 +31,6 @@ import urllib2
 import requests
 from requests.exceptions import *
 #from requests.packages.urllib3.exceptions import DecodeError
-from urlparse import urlparse
 from datetime import datetime
 
 import gevent.monkey
@@ -51,7 +50,6 @@ requests_log.propagate = True
 """
 
 
-import models
 import conf
 if not conf.ON_HEROKU:
     import search as fastsearch
@@ -60,9 +58,9 @@ import utils
 if not conf.ON_HEROKU:
     from flask.ext.mail import Message
     from pyaggr3g470r import mail
-    
-from pyaggr3g470r import app, db
-from pyaggr3g470r.models import User, Feed, Article
+
+from pyaggr3g470r import db
+from pyaggr3g470r.models import User, Article
 
 import log
 pyaggr3g470r_log = log.Log("feedgetter")
@@ -75,6 +73,7 @@ class TooLong(Exception):
         """
         pyaggr3g470r_log.warning("Greenlet took to long")
 
+
 class FeedGetter(object):
     """
     This class is in charge of retrieving the feeds.
@@ -88,7 +87,7 @@ class FeedGetter(object):
             self.proxy = urllib2.ProxyHandler({})
             self.proxies = {}
         else:
-            self.proxy = urllib2.ProxyHandler({"http" : conf.HTTP_PROXY, \
+            self.proxy = urllib2.ProxyHandler({"http": conf.HTTP_PROXY,
                                                "https": conf.HTTP_PROXY})
             self.proxies = {
                             "http": "http://" + conf.HTTP_PROXY,
@@ -110,7 +109,8 @@ class FeedGetter(object):
             feeds = [feed for feed in feeds if feed.id == feed_id]
 
         # 2 - Fetch the feeds.
-        # 'responses' contains all the jobs returned by the function retrieve_async()
+        # 'responses' contains all the jobs returned by
+        # the function retrieve_async()
         responses = self.retrieve_async(feeds)
         elements = [item.value for item in responses if item.value is not None]
 
@@ -133,7 +133,7 @@ class FeedGetter(object):
             Fetch a feed.
             """
             pyaggr3g470r_log.info("Fetching the feed:" + feed.title)
-            a_feed = feedparser.parse(feed.link, handlers = [self.proxy])
+            a_feed = feedparser.parse(feed.link, handlers=[self.proxy])
             if a_feed['entries'] == []:
                 return
 
@@ -155,14 +155,20 @@ class FeedGetter(object):
                 nice_url = article.link.encode("utf-8")
                 if conf.RESOLVE_ARTICLE_URL:
                     try:
-                        # resolves URL behind proxies (like feedproxy.google.com)
-                        r = requests.get(article.link, timeout=5.0, proxies=self.proxies)
+                        # resolves URL behind proxies
+                        # (like feedproxy.google.com)
+                        r = requests.get(article.link, timeout=5.0,
+                                            proxies=self.proxies)
                         nice_url = r.url.encode("utf-8")
                     except Timeout:
-                        pyaggr3g470r_log.warning("Timeout when getting the real URL of %s." % (article.link,))
+                        pyaggr3g470r_log.\
+                        warning("Timeout when getting the real URL of %s." %
+                                    (article.link,))
                         continue
                     except Exception as e:
-                        pyaggr3g470r_log.warning("Unable to get the real URL of %s. Error: %s" % (article.link, str(e)))
+                        pyaggr3g470r_log.\
+                        warning("Unable to get the real URL of %s. Error: %s" %
+                                    (article.link, str(e)))
                         continue
                 # remove utm_* parameters
                 nice_url = utils.clean_url(nice_url)
@@ -181,7 +187,7 @@ class FeedGetter(object):
                 try:
                     description = BeautifulSoup(description, "html.parser").decode()
                     article_title = BeautifulSoup(article.title, "html.parser").decode()
-                except Exception as E:
+                except Exception:
                     pyaggr3g470r_log.error("Problem when sanitizing the content of the article %s (%s)" % (article_title, nice_url))
                     article_title = article.title
 
@@ -215,7 +221,9 @@ class FeedGetter(object):
 
             for article in articles:
 
-                exist = Article.query.filter(Article.user_id == self.user.id, Article.feed_id == feed.id, Article.link == article.link).first()
+                exist = Article.query.filter(Article.user_id == self.user.id,
+                                        Article.feed_id == feed.id,
+                                        Article.link == article.link).first()
                 if exist != None:
                     pyaggr3g470r_log.error("Article %s (%s) already in the database." % (article.title, article.link))
                     continue
@@ -242,9 +250,11 @@ class FeedGetter(object):
         pyaggr3g470r_log.info("Indexing new articles.")
         for feed, articles in elements:
             for element in articles:
-                article = Article.query.filter(Article.user_id == self.user.id, Article.link == element.link).first()
+                article = Article.query.filter(Article.user_id == self.user.id,
+                                        Article.link == element.link).first()
                 try:
-                    fastsearch.add_to_index(self.user.id, [article], article.source)
+                    fastsearch.add_to_index(self.user.id, [article],
+                                                article.source)
                 except:
                     pyaggr3g470r_log.error("Problem during indexation.")
         return True
 \ No newline at end of file
diff --git a/pyaggr3g470r/decorators.py b/pyaggr3g470r/decorators.py
index 565d25a6..a32e9709 100644
--- a/pyaggr3g470r/decorators.py
+++ b/pyaggr3g470r/decorators.py
@@ -8,16 +8,19 @@ from flask import g, redirect, url_for, flash
 
 from pyaggr3g470r.models import Feed
 
+
 def async(f):
     def wrapper(*args, **kwargs):
-        thr = Thread(target = f, args = args, kwargs = kwargs)
+        thr = Thread(target=f, args=args, kwargs=kwargs)
         thr.start()
     return wrapper
 
+
 def feed_access_required(func):
     """
     This decorator enables to check if a user has access to a feed.
-    The administrator of the platform is able to access to the feeds of a normal user.
+    The administrator of the platform is able to access to the feeds
+    of a normal user.
     """
     @wraps(func)
     def decorated(*args, **kwargs):
diff --git a/pyaggr3g470r/export.py b/pyaggr3g470r/export.py
index e7978e7c..243b6843 100644
--- a/pyaggr3g470r/export.py
+++ b/pyaggr3g470r/export.py
@@ -40,9 +40,9 @@ import tarfile
 from datetime import datetime
 
 import conf
-import utils
 import models
 
+
 def HTML_HEADER(title="pyAggr3g470r", css="./style.css"):
     return """<!DOCTYPE html>
 <html lang="en-US">
@@ -132,6 +132,7 @@ img {
     margin:1.00em 1.00em;
 }"""
 
+
 def export_html(user):
     """
     Export all articles of 'user' in Web pages.
diff --git a/pyaggr3g470r/search.py b/pyaggr3g470r/search.py
index 6f8168db..f4e57251 100644
--- a/pyaggr3g470r/search.py
+++ b/pyaggr3g470r/search.py
@@ -37,16 +37,16 @@ from whoosh.writing import AsyncWriter
 from collections import defaultdict
 
 import utils
-import models
 
 indexdir = "./pyaggr3g470r/var/indexdir"
 
-schema = Schema(title=TEXT, \
-                content=TEXT, \
-                article_id=NUMERIC(int, stored=True), \
-                feed_id=NUMERIC(int, stored=True), \
+schema = Schema(title=TEXT,
+                content=TEXT,
+                article_id=NUMERIC(int, stored=True),
+                feed_id=NUMERIC(int, stored=True),
                 user_id=NUMERIC(int, stored=True))
 
+
 def create_index(user):
     """
     Creates the index.
@@ -57,13 +57,14 @@ def create_index(user):
     writer = ix.writer()
     for feed in user.feeds:
         for article in feed.articles:
-            writer.add_document(title=article.title, \
-                                content=utils.clear_string(article.content), \
-                                article_id=article.id, \
-                                feed_id=feed.id, \
+            writer.add_document(title=article.title,
+                                content=utils.clear_string(article.content),
+                                article_id=article.id,
+                                feed_id=feed.id,
                                 user_id=user.id)
     writer.commit()
 
+
 def add_to_index(user_id, articles, feed):
     """
     Add a list of articles to the index.
@@ -72,32 +73,35 @@ def add_to_index(user_id, articles, feed):
     """
     try:
         ix = open_dir(indexdir)
-    except (EmptyIndexError, OSError) as e:
+    except (EmptyIndexError, OSError):
         if not os.path.exists(indexdir):
             os.makedirs(indexdir)
         ix = create_in(indexdir, schema)
     writer = AsyncWriter(ix)
     for article in articles:
-        writer.add_document(title=article.title, \
-                            content=utils.clear_string(article.content), \
-                            article_id=article.id, \
-                            feed_id=feed.id, \
+        writer.add_document(title=article.title,
+                            content=utils.clear_string(article.content),
+                            article_id=article.id,
+                            feed_id=feed.id,
                             user_id=user_id)
     writer.commit()
 
+
 def delete_article(user_id, feed_id, article_id):
     """
     Delete an article from the index.
     """
     try:
         ix = open_dir(indexdir)
-    except (EmptyIndexError, OSError) as e:
+    except (EmptyIndexError, OSError):
         raise EmptyIndexError
     writer = ix.writer()
-    document = And([Term("user_id", user_id), Term("feed_id", feed_id), Term("article_id", article_id)])
+    document = And([Term("user_id", user_id), Term("feed_id", feed_id),
+                    Term("article_id", article_id)])
     writer.delete_by_query(document)
     writer.commit()
 
+
 def search(user_id, term):
     """
     Search for `term` in the index.
@@ -106,7 +110,7 @@ def search(user_id, term):
     result_dict = defaultdict(list)
     try:
         ix = open_dir(indexdir)
-    except (EmptyIndexError, OSError) as e:
+    except (EmptyIndexError, OSError):
         raise EmptyIndexError
     with ix.searcher() as searcher:
         query = QueryParser("content", ix.schema).parse(term)
@@ -115,13 +119,14 @@ def search(user_id, term):
             result_dict[article["feed_id"]].append(article["article_id"])
         return result_dict, len(results)
 
+
 def nb_documents():
     """
     Return the number of undeleted documents.
     """
     try:
         ix = open_dir(indexdir)
-    except (EmptyIndexError, OSError) as e:
+    except (EmptyIndexError, OSError):
         raise EmptyIndexError
     return ix.doc_count()
 
diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py
index ce210c20..88a3904a 100755
--- a/pyaggr3g470r/utils.py
+++ b/pyaggr3g470r/utils.py
@@ -34,12 +34,10 @@ __license__ = "AGPLv3"
 # - e-mail notifications.
 #
 
-import os
 import re
 import glob
 import opml
 import operator
-import calendar
 from urllib import urlencode
 from urlparse import urlparse, parse_qs, urlunparse
 from BeautifulSoup import BeautifulSoup
@@ -52,7 +50,7 @@ from pyaggr3g470r import db
 from pyaggr3g470r.models import User, Feed
 
 # regular expression to check URL
-url_finders = [ \
+url_finders = [
     re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?/[-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]*[^]'\\.}>\\),\\\"]"), \
     re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?"), \
     re.compile("(~/|/|\\./)([-A-Za-z0-9_\\$\\.\\+\\!\\*\\(\\),;:@&=\\?/~\\#\\%]|\\\\)+"), \
@@ -62,6 +60,7 @@ url_finders = [ \
 #import log
 #pyaggr3g470r_log = log.Log()
 
+
 @contextmanager
 def opened_w_error(filename, mode="r"):
     try:
@@ -74,6 +73,7 @@ def opened_w_error(filename, mode="r"):
         finally:
             f.close()
 
+
 def import_opml(email, opml_file):
     """
     Import new feeds from an OPML file.
@@ -89,11 +89,11 @@ def import_opml(email, opml_file):
         Parse recursively through the categories and sub-categories.
         """
         for subscription in subsubscription:
-            
+
             if len(subscription) != 0:
                 nb = read(subscription, nb)
             else:
-                
+
                 try:
                     title = subscription.text
 
@@ -118,7 +118,9 @@ def import_opml(email, opml_file):
                 except:
                     site_link = ""
 
-                new_feed = Feed(title=title, description=description, link=link, site_link=site_link, email_notification=False, enabled=True)
+                new_feed = Feed(title=title, description=description,
+                                link=link, site_link=site_link,
+                                email_notification=False, enabled=True)
 
                 user.feeds.append(new_feed)
                 nb += 1
@@ -128,13 +130,15 @@ def import_opml(email, opml_file):
     db.session.commit()
     return nb
 
+
 def clean_url(url):
     """
     Remove utm_* parameters
     """
     parsed_url = urlparse(url)
     qd = parse_qs(parsed_url.query, keep_blank_values=True)
-    filtered = dict((k, v) for k, v in qd.iteritems() if not k.startswith('utm_'))
+    filtered = dict((k, v) for k, v in qd.iteritems()
+                                        if not k.startswith('utm_'))
     nice_url = urlunparse([
         parsed_url.scheme,
         parsed_url.netloc,
@@ -145,6 +149,7 @@ def clean_url(url):
     ])
     return nice_url
 
+
 def open_url(url):
     """
     Open an URL with the proxy and the user-agent
@@ -175,6 +180,7 @@ def open_url(url):
             #pyaggr3g470r_log.error(url + " " + str(e.reason.errno) + " " + e.reason.strerror)
         return (False, error)
 
+
 def clear_string(data):
     """
     Clear a string by removing HTML tags, HTML special caracters
@@ -184,6 +190,7 @@ def clear_string(data):
     q = re.compile('\s') # consecutive white spaces
     return p.sub('', q.sub(' ', data))
 
+
 def load_stop_words():
     """
     Load the stop words and return them in a list.
@@ -199,6 +206,7 @@ def load_stop_words():
                 stop_words += stop_wods_file.read().split(";")
     return stop_words
 
+
 def top_words(articles, n=10, size=5):
     """
     Return the n most frequent words in a list.
@@ -213,6 +221,7 @@ def top_words(articles, n=10, size=5):
             words[word] += 1
     return words.most_common(n)
 
+
 def tag_cloud(tags):
     """
     Generates a tags cloud.
@@ -222,6 +231,7 @@ def tag_cloud(tags):
                     (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, format(count, ',d'), word)) \
                         for (word, count) in tags])
 
+
 def search_feed(url):
     """
     Search a feed in a HTML page.
diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py
index 514d1920..02ef4f9e 100644
--- a/pyaggr3g470r/views.py
+++ b/pyaggr3g470r/views.py
@@ -38,7 +38,6 @@ from werkzeug import generate_password_hash
 import conf
 import utils
 import export
-import models
 if not conf.ON_HEROKU:
     import search as fastsearch
 from forms import SigninForm, AddFeedForm, ProfileForm
@@ -103,7 +102,7 @@ def page_not_found(e):
     return render_template('errors/404.html'), 404
 
 @app.errorhandler(500)
-def page_not_found(e):
+def internal_server_error(e):
     return render_template('errors/500.html'), 500
 
 
@@ -166,7 +165,7 @@ def home():
         new_feed.id = feed.id
         new_feed.title = feed.title
         new_feed.enabled = feed.enabled
-        new_feed.articles = Article.query.filter(Article.user_id == g.user.id, 
+        new_feed.articles = Article.query.filter(Article.user_id == g.user.id,
                                                  Article.feed_id == feed.id).order_by(desc("Article.date")).limit(9)
         result.append(new_feed)
     unread_articles = len(Article.query.filter(Article.user_id == g.user.id, Article.readed == False).all())
@@ -247,7 +246,7 @@ def article(article_id=None):
         return render_template('article.html', head_title=utils.clear_string(article.title), article=article)
     flash("This article do not exist.", 'warning')
     return redirect(redirect_url())
-    
+
 
 @app.route('/mark_as_read/', methods=['GET'])
 @app.route('/mark_as_read/<int:feed_id>', methods=['GET'])
@@ -473,7 +472,7 @@ def management():
         data = request.files.get('opmlfile', None)
         if None == data or not allowed_file(data.filename):
             flash('File not allowed.', 'danger')
-        else:  
+        else:
             opml_path = os.path.join("./pyaggr3g470r/var/", data.filename)
             data.save(opml_path)
             try:
author	Cédric Bonhomme <cedric@cedricbonhomme.org>	2014-04-27 02:09:26 +0200
committer	Cédric Bonhomme <cedric@cedricbonhomme.org>	2014-04-27 02:09:26 +0200
commit	89cf405ab970c4e289b7b79485b27aed8edb1a41 (patch)
tree	7f6330fb7b9bab82a84aeb2a639377b9c4fe1d80 /pyaggr3g470r
parent	This fixes #3. (diff)
download	newspipe-89cf405ab970c4e289b7b79485b27aed8edb1a41.tar.gz newspipe-89cf405ab970c4e289b7b79485b27aed8edb1a41.tar.bz2 newspipe-89cf405ab970c4e289b7b79485b27aed8edb1a41.zip