aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2015-03-08 12:07:36 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2015-03-08 12:07:36 +0100
commit2378de49ba37116c5bf93054fd6aed65fa44022a (patch)
tree90bb6efddb1d8fc4772c74fcf5dda4dccef74b1a
parentBetter handling of the error logging in the crawler. (diff)
downloadnewspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.tar.gz
newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.tar.bz2
newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.zip
Moved duplicate() function in utils.py. Some minor cosmethic changes.
-rw-r--r--bootstrap.py7
-rw-r--r--pyaggr3g470r/duplicate.py20
-rwxr-xr-xpyaggr3g470r/utils.py13
-rw-r--r--pyaggr3g470r/views/api/article.py3
-rw-r--r--pyaggr3g470r/views/api/common.py5
-rw-r--r--pyaggr3g470r/views/api/feed.py9
-rw-r--r--pyaggr3g470r/views/article.py3
-rw-r--r--pyaggr3g470r/views/feed.py3
-rw-r--r--pyaggr3g470r/views/views.py4
9 files changed, 35 insertions, 32 deletions
diff --git a/bootstrap.py b/bootstrap.py
index 5cfd2250..671552b3 100644
--- a/bootstrap.py
+++ b/bootstrap.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
# required imports and code exection for basic functionning
import os
@@ -6,7 +9,7 @@ import logging
def set_logging(log_path, log_level=logging.INFO,
log_format='%(asctime)s %(levelname)s %(message)s'):
- logger = logging.getLogger('pyaggr3g470r')
+ logger = logging.getLogger('pyAggr3g470r')
formater = logging.Formatter(log_format)
handler = logging.FileHandler(log_path)
handler.setFormatter(formater)
@@ -17,7 +20,7 @@ from flask import Flask
from flask.ext.sqlalchemy import SQLAlchemy
# Create Flask application
-application = Flask('pyaggr3g470r')
+application = Flask('pyAggr3g470r')
application.debug = conf.WEBSERVER_DEBUG
set_logging(conf.LOG_PATH, log_level=logging.DEBUG if conf.WEBSERVER_DEBUG
else logging.INFO)
diff --git a/pyaggr3g470r/duplicate.py b/pyaggr3g470r/duplicate.py
deleted file mode 100644
index d4c6e31a..00000000
--- a/pyaggr3g470r/duplicate.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#! /usr/bin/env python
-#-*- coding: utf-8 -*-
-
-import itertools
-from datetime import timedelta
-
-from pyaggr3g470r import utils
-
-def compare_documents(feed):
- """
- Compare a list of documents by pair.
- """
- duplicates = []
- for pair in itertools.combinations(feed.articles, 2):
- date1 = pair[0].date
- date2 = pair[1].date
- if utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) and \
- (date1 - date2) < timedelta(days = 1):
- duplicates.append(pair)
- return duplicates \ No newline at end of file
diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py
index 3ed89f55..ba440c78 100755
--- a/pyaggr3g470r/utils.py
+++ b/pyaggr3g470r/utils.py
@@ -41,12 +41,14 @@ import logging
import datetime
import operator
import urllib
+import itertools
import subprocess
try:
from urlparse import urlparse, parse_qs, urlunparse
except:
from urllib.parse import urlparse, parse_qs, urlunparse
from bs4 import BeautifulSoup
+from datetime import timedelta
from collections import Counter
from contextlib import contextmanager
@@ -283,6 +285,17 @@ def tag_cloud(tags):
(min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, format(count, ',d'), word)) \
for (word, count) in tags])
+def compare_documents(feed):
+ """
+ Compare a list of documents by pair.
+ """
+ duplicates = []
+ for pair in itertools.combinations(feed.articles, 2):
+ date1, date2 = pair[0].date, pair[1].date
+ if clear_string(pair[0].title) == clear_string(pair[1].title) and \
+ (date1 - date2) < timedelta(days = 1):
+ duplicates.append(pair)
+ return duplicates
def search_feed(url):
"""
diff --git a/pyaggr3g470r/views/api/article.py b/pyaggr3g470r/views/api/article.py
index 17881412..c3ec2d34 100644
--- a/pyaggr3g470r/views/api/article.py
+++ b/pyaggr3g470r/views/api/article.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
from flask import g
import dateutil.parser
diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py
index bfdc7860..856b4bb9 100644
--- a/pyaggr3g470r/views/api/common.py
+++ b/pyaggr3g470r/views/api/common.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
"""For a given resources, classes in the module intend to create the following
routes :
GET resource/<id>
@@ -54,13 +57,11 @@ def authenticate(func):
and user.activation_key == "":
g.user = user
logged_in = True
-
if logged_in:
return func(*args, **kwargs)
raise Unauthorized({'WWWAuthenticate': 'Basic realm="Login Required"'})
return wrapper
-
def to_response(func):
"""Will cast results of func as a result, and try to extract
a status_code for the Response object"""
diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py
index 0d83ea43..7d0e2862 100644
--- a/pyaggr3g470r/views/api/feed.py
+++ b/pyaggr3g470r/views/api/feed.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
from flask import g
from pyaggr3g470r.controllers.feed import FeedController, \
@@ -8,7 +11,6 @@ from pyaggr3g470r.views.api.common import PyAggAbstractResource, \
PyAggResourceExisting, \
PyAggResourceMulti
-
FEED_ATTRS = {'title': {'type': str},
'description': {'type': str},
'link': {'type': str},
@@ -20,25 +22,21 @@ FEED_ATTRS = {'title': {'type': str},
'last_error': {'type': str},
'error_count': {'type': int, 'default': 0}}
-
class FeedNewAPI(PyAggResourceNew):
controller_cls = FeedController
attrs = FEED_ATTRS
to_date = ['date', 'last_retrieved']
-
class FeedAPI(PyAggResourceExisting):
controller_cls = FeedController
attrs = FEED_ATTRS
to_date = ['date', 'last_retrieved']
-
class FeedsAPI(PyAggResourceMulti):
controller_cls = FeedController
attrs = FEED_ATTRS
to_date = ['date', 'last_retrieved']
-
class FetchableFeedAPI(PyAggAbstractResource):
controller_cls = FeedController
to_date = ['date', 'last_retrieved']
@@ -49,7 +47,6 @@ class FetchableFeedAPI(PyAggAbstractResource):
return [feed for feed in self.controller.list_fetchable(
**self.reqparse_args())]
-
g.api.add_resource(FeedNewAPI, '/feed', endpoint='feed_new.json')
g.api.add_resource(FeedAPI, '/feed/<int:obj_id>', endpoint='feed.json')
g.api.add_resource(FeedsAPI, '/feeds', endpoint='feeds.json')
diff --git a/pyaggr3g470r/views/article.py b/pyaggr3g470r/views/article.py
index a209f888..08c92686 100644
--- a/pyaggr3g470r/views/article.py
+++ b/pyaggr3g470r/views/article.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
from flask import Blueprint, g, render_template, redirect
from sqlalchemy import desc
diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py
index 2af502a7..2e39ee4b 100644
--- a/pyaggr3g470r/views/feed.py
+++ b/pyaggr3g470r/views/feed.py
@@ -1,3 +1,6 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
from datetime import datetime
from flask import Blueprint, g, render_template
diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py
index 0f1f8765..9d368c42 100644
--- a/pyaggr3g470r/views/views.py
+++ b/pyaggr3g470r/views/views.py
@@ -47,7 +47,7 @@ from sqlalchemy.exc import IntegrityError
from werkzeug import generate_password_hash
import conf
-from pyaggr3g470r import utils, notifications, export, duplicate
+from pyaggr3g470r import utils, notifications, export
from pyaggr3g470r.models import User, Feed, Article, Role
from pyaggr3g470r.decorators import feed_access_required
from pyaggr3g470r.forms import SignupForm, SigninForm, AddFeedForm, \
@@ -399,7 +399,7 @@ def duplicates(feed_id=None):
"""
feed = Feed.query.filter(Feed.user_id == g.user.id, Feed.id == feed_id).first()
duplicates = []
- duplicates = duplicate.compare_documents(feed)
+ duplicates = utils.compare_documents(feed)
return render_template('duplicates.html', duplicates=duplicates, feed=feed)
@app.route('/index_database', methods=['GET'])
bgstack15