various improvements to the crawler (better use of coroutines, test if an article should be updated). tags are now retrieved for the k-means clustering (previously achived with the content of articles)

author: Cédric Bonhomme <cedric@cedricbonhomme.org> 2016-11-08 14:39:47 +0100
committer: Cédric Bonhomme <cedric@cedricbonhomme.org> 2016-11-08 14:39:47 +0100
commit: 2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96 (patch)
tree: 39895c10f68cf0b13d957073268769d04aa924a0 /src/web/lib/feed_utils.py
parent: Closes section HTML tag. (diff)
download: newspipe-2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96.tar.gz
newspipe-2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96.tar.bz2
newspipe-2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96.zip
1 files changed, 3 insertions, 2 deletions
diff --git a/src/web/lib/feed_utils.py b/src/web/lib/feed_utils.py
index 9925613f..94ae6e53 100644
--- a/src/web/lib/feed_utils.py
+++ b/src/web/lib/feed_utils.py
@@ -3,7 +3,7 @@ import urllib
 import logging
 import requests
 import feedparser
-from conf import USER_AGENT
+from conf import CRAWLER_USER_AGENT
 from bs4 import BeautifulSoup, SoupStrainer
 
 from web.lib.utils import try_keys, try_get_icon_url, rebuild_url
@@ -32,7 +32,8 @@ def escape_keys(*keys):
 
 @escape_keys('title', 'description')
 def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
-    requests_kwargs = {'headers': {'User-Agent': USER_AGENT}, 'verify': False}
+    requests_kwargs = {'headers': {'User-Agent': CRAWLER_USER_AGENT},
+                        'verify': False}
     if url is None and fp_parsed is not None:
         url = fp_parsed.get('url')
     if url is not None and fp_parsed is None:
author	Cédric Bonhomme <cedric@cedricbonhomme.org>	2016-11-08 14:39:47 +0100
committer	Cédric Bonhomme <cedric@cedricbonhomme.org>	2016-11-08 14:39:47 +0100
commit	2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96 (patch)
tree	39895c10f68cf0b13d957073268769d04aa924a0 /src/web/lib/feed_utils.py
parent	Closes section HTML tag. (diff)
download	newspipe-2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96.tar.gz newspipe-2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96.tar.bz2 newspipe-2d72f44a90a76fe7450e59fdfdf4d42f44b9cd96.zip