diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-03-08 12:07:36 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-03-08 12:07:36 +0100 |
commit | 2378de49ba37116c5bf93054fd6aed65fa44022a (patch) | |
tree | 90bb6efddb1d8fc4772c74fcf5dda4dccef74b1a /pyaggr3g470r | |
parent | Better handling of the error logging in the crawler. (diff) | |
download | newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.tar.gz newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.tar.bz2 newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.zip |
Moved duplicate() function in utils.py. Some minor cosmethic changes.
Diffstat (limited to 'pyaggr3g470r')
-rw-r--r-- | pyaggr3g470r/duplicate.py | 20 | ||||
-rwxr-xr-x | pyaggr3g470r/utils.py | 13 | ||||
-rw-r--r-- | pyaggr3g470r/views/api/article.py | 3 | ||||
-rw-r--r-- | pyaggr3g470r/views/api/common.py | 5 | ||||
-rw-r--r-- | pyaggr3g470r/views/api/feed.py | 9 | ||||
-rw-r--r-- | pyaggr3g470r/views/article.py | 3 | ||||
-rw-r--r-- | pyaggr3g470r/views/feed.py | 3 | ||||
-rw-r--r-- | pyaggr3g470r/views/views.py | 4 |
8 files changed, 30 insertions, 30 deletions
diff --git a/pyaggr3g470r/duplicate.py b/pyaggr3g470r/duplicate.py deleted file mode 100644 index d4c6e31a..00000000 --- a/pyaggr3g470r/duplicate.py +++ /dev/null @@ -1,20 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -import itertools -from datetime import timedelta - -from pyaggr3g470r import utils - -def compare_documents(feed): - """ - Compare a list of documents by pair. - """ - duplicates = [] - for pair in itertools.combinations(feed.articles, 2): - date1 = pair[0].date - date2 = pair[1].date - if utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) and \ - (date1 - date2) < timedelta(days = 1): - duplicates.append(pair) - return duplicates
\ No newline at end of file diff --git a/pyaggr3g470r/utils.py b/pyaggr3g470r/utils.py index 3ed89f55..ba440c78 100755 --- a/pyaggr3g470r/utils.py +++ b/pyaggr3g470r/utils.py @@ -41,12 +41,14 @@ import logging import datetime import operator import urllib +import itertools import subprocess try: from urlparse import urlparse, parse_qs, urlunparse except: from urllib.parse import urlparse, parse_qs, urlunparse from bs4 import BeautifulSoup +from datetime import timedelta from collections import Counter from contextlib import contextmanager @@ -283,6 +285,17 @@ def tag_cloud(tags): (min(1 + count * 7 / max([tag[1] for tag in tags]), 7), word, format(count, ',d'), word)) \ for (word, count) in tags]) +def compare_documents(feed): + """ + Compare a list of documents by pair. + """ + duplicates = [] + for pair in itertools.combinations(feed.articles, 2): + date1, date2 = pair[0].date, pair[1].date + if clear_string(pair[0].title) == clear_string(pair[1].title) and \ + (date1 - date2) < timedelta(days = 1): + duplicates.append(pair) + return duplicates def search_feed(url): """ diff --git a/pyaggr3g470r/views/api/article.py b/pyaggr3g470r/views/api/article.py index 17881412..c3ec2d34 100644 --- a/pyaggr3g470r/views/api/article.py +++ b/pyaggr3g470r/views/api/article.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 - + from flask import g import dateutil.parser diff --git a/pyaggr3g470r/views/api/common.py b/pyaggr3g470r/views/api/common.py index bfdc7860..856b4bb9 100644 --- a/pyaggr3g470r/views/api/common.py +++ b/pyaggr3g470r/views/api/common.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 - + """For a given resources, classes in the module intend to create the following routes : GET resource/<id> @@ -54,13 +57,11 @@ def authenticate(func): and user.activation_key == "": g.user = user logged_in = True - if logged_in: return func(*args, **kwargs) raise Unauthorized({'WWWAuthenticate': 'Basic realm="Login Required"'}) return wrapper - def to_response(func): """Will cast results of func as a result, and try to extract a status_code for the Response object""" diff --git a/pyaggr3g470r/views/api/feed.py b/pyaggr3g470r/views/api/feed.py index 0d83ea43..7d0e2862 100644 --- a/pyaggr3g470r/views/api/feed.py +++ b/pyaggr3g470r/views/api/feed.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 - + from flask import g from pyaggr3g470r.controllers.feed import FeedController, \ @@ -8,7 +11,6 @@ from pyaggr3g470r.views.api.common import PyAggAbstractResource, \ PyAggResourceExisting, \ PyAggResourceMulti - FEED_ATTRS = {'title': {'type': str}, 'description': {'type': str}, 'link': {'type': str}, @@ -20,25 +22,21 @@ FEED_ATTRS = {'title': {'type': str}, 'last_error': {'type': str}, 'error_count': {'type': int, 'default': 0}} - class FeedNewAPI(PyAggResourceNew): controller_cls = FeedController attrs = FEED_ATTRS to_date = ['date', 'last_retrieved'] - class FeedAPI(PyAggResourceExisting): controller_cls = FeedController attrs = FEED_ATTRS to_date = ['date', 'last_retrieved'] - class FeedsAPI(PyAggResourceMulti): controller_cls = FeedController attrs = FEED_ATTRS to_date = ['date', 'last_retrieved'] - class FetchableFeedAPI(PyAggAbstractResource): controller_cls = FeedController to_date = ['date', 'last_retrieved'] @@ -49,7 +47,6 @@ class FetchableFeedAPI(PyAggAbstractResource): return [feed for feed in self.controller.list_fetchable( **self.reqparse_args())] - g.api.add_resource(FeedNewAPI, '/feed', endpoint='feed_new.json') g.api.add_resource(FeedAPI, '/feed/<int:obj_id>', endpoint='feed.json') g.api.add_resource(FeedsAPI, '/feeds', endpoint='feeds.json') diff --git a/pyaggr3g470r/views/article.py b/pyaggr3g470r/views/article.py index a209f888..08c92686 100644 --- a/pyaggr3g470r/views/article.py +++ b/pyaggr3g470r/views/article.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 - + from flask import Blueprint, g, render_template, redirect from sqlalchemy import desc diff --git a/pyaggr3g470r/views/feed.py b/pyaggr3g470r/views/feed.py index 2af502a7..2e39ee4b 100644 --- a/pyaggr3g470r/views/feed.py +++ b/pyaggr3g470r/views/feed.py @@ -1,3 +1,6 @@ +#! /usr/bin/env python +# -*- coding: utf-8 - + from datetime import datetime from flask import Blueprint, g, render_template diff --git a/pyaggr3g470r/views/views.py b/pyaggr3g470r/views/views.py index 0f1f8765..9d368c42 100644 --- a/pyaggr3g470r/views/views.py +++ b/pyaggr3g470r/views/views.py @@ -47,7 +47,7 @@ from sqlalchemy.exc import IntegrityError from werkzeug import generate_password_hash import conf -from pyaggr3g470r import utils, notifications, export, duplicate +from pyaggr3g470r import utils, notifications, export from pyaggr3g470r.models import User, Feed, Article, Role from pyaggr3g470r.decorators import feed_access_required from pyaggr3g470r.forms import SignupForm, SigninForm, AddFeedForm, \ @@ -399,7 +399,7 @@ def duplicates(feed_id=None): """ feed = Feed.query.filter(Feed.user_id == g.user.id, Feed.id == feed_id).first() duplicates = [] - duplicates = duplicate.compare_documents(feed) + duplicates = utils.compare_documents(feed) return render_template('duplicates.html', duplicates=duplicates, feed=feed) @app.route('/index_database', methods=['GET']) |