From b1d92793268f9db737837c0899272d576c45c537 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Fri, 15 Jan 2016 14:39:49 +0100 Subject: fixing logging --- src/web/lib/crawler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/web/lib') diff --git a/src/web/lib/crawler.py b/src/web/lib/crawler.py index 979ccbfc..7343ea4d 100644 --- a/src/web/lib/crawler.py +++ b/src/web/lib/crawler.py @@ -155,9 +155,9 @@ class FeedCrawler(AbstractCrawler): response.raise_for_status() except Exception as error: error_count = self.feed['error_count'] + 1 - logger.error('%r %r - an error occured while fetching ' - 'feed; bumping error count to %r', self.feed['id'], - self.feed['title'], error_count) + logger.exception('%r %r - an error occured while fetching ' + 'feed; bumping error count to %r', + self.feed['id'], self.feed['title'], error_count) future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], {'error_count': error_count, 'last_error': str(error), -- cgit From 462f6d3b21558ed0a283c24e0e0332eac6ccbbb3 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Fri, 11 Sep 2015 18:28:12 +0200 Subject: base modification in model for category support --- src/web/lib/crawler.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/web/lib') diff --git a/src/web/lib/crawler.py b/src/web/lib/crawler.py index 7343ea4d..f480fe96 100644 --- a/src/web/lib/crawler.py +++ b/src/web/lib/crawler.py @@ -18,7 +18,6 @@ import json import logging import feedparser from datetime import datetime, timedelta -from functools import wraps from time import strftime, gmtime from concurrent.futures import ThreadPoolExecutor from requests_futures.sessions import FuturesSession @@ -132,7 +131,7 @@ class PyAggUpdater(AbstractCrawler): {key: "%s -> %s" % (up_feed[key], self.feed.get(key)) for key in up_feed if up_feed[key] != self.feed.get(key)}) - future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed) + self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed) class FeedCrawler(AbstractCrawler): @@ -144,8 +143,8 @@ class FeedCrawler(AbstractCrawler): def clean_feed(self): """Will reset the errors counters on a feed that have known errors""" if self.feed.get('error_count') or self.feed.get('last_error'): - future = self.query_pyagg('put', 'feed/%d' % self.feed['id'], - {'error_count': 0, 'last_error': ''}) + self.query_pyagg('put', 'feed/%d' % self.feed['id'], + {'error_count': 0, 'last_error': ''}) def callback(self, response): """will fetch the feed and interprete results (304, etag) or will -- cgit From 5b7db9398abaacea241d9fcce7885457c562d7fa Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Sun, 11 Oct 2015 12:18:07 +0200 Subject: a bit of cleaning, putting code where it belongs --- src/web/lib/utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/web/lib') diff --git a/src/web/lib/utils.py b/src/web/lib/utils.py index aa552a12..88d24ba5 100644 --- a/src/web/lib/utils.py +++ b/src/web/lib/utils.py @@ -1,8 +1,10 @@ +import re import types import urllib import logging import requests from hashlib import md5 +from flask import request, url_for logger = logging.getLogger(__name__) @@ -55,3 +57,17 @@ def try_get_icon_url(url, *splits): def to_hash(text): return md5(text.encode('utf8') if hasattr(text, 'encode') else text)\ .hexdigest() + + +def clear_string(data): + """ + Clear a string by removing HTML tags, HTML special caracters + and consecutive white spaces (more that one). + """ + p = re.compile('<[^>]+>') # HTML tags + q = re.compile('\s') # consecutive white spaces + return p.sub('', q.sub(' ', data)) + + +def redirect_url(default='home'): + return request.args.get('next') or request.referrer or url_for(default) -- cgit From 5949bd6787f2a7fa910dea6036df0315e119b457 Mon Sep 17 00:00:00 2001 From: François Schmidts Date: Sun, 31 Jan 2016 18:02:52 +0100 Subject: modal from nav --- src/web/lib/feed_utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/web/lib') diff --git a/src/web/lib/feed_utils.py b/src/web/lib/feed_utils.py index 14e6b82b..80800bec 100644 --- a/src/web/lib/feed_utils.py +++ b/src/web/lib/feed_utils.py @@ -9,6 +9,8 @@ from web.lib.utils import try_keys, try_get_icon_url, rebuild_url logger = logging.getLogger(__name__) logging.captureWarnings(True) +ACCEPTED_MIMETYPES = ('application/rss+xml', 'application/rdf+xml', + 'application/atom+xml', 'application/xml', 'text/xml') def is_parsing_ok(parsed_feed): @@ -96,8 +98,11 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): del feed['icon_url'] if not feed.get('link'): - alternates = bs_parsed.find_all(check_keys(rel=['alternate'], - type=['application/rss+xml'])) - if len(alternates) >= 1: - feed['link'] = rebuild_url(alternates[0].attrs['href'], feed_split) + for type_ in ACCEPTED_MIMETYPES: + alternates = bs_parsed.find_all(check_keys( + rel=['alternate'], type=[type_])) + if len(alternates) >= 1: + feed['link'] = rebuild_url(alternates[0].attrs['href'], + feed_split) + break return feed -- cgit