aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrançois Schmidts <francois.schmidts@gmail.com>2016-01-08 16:30:09 +0100
committerFrançois Schmidts <francois.schmidts@gmail.com>2016-01-11 09:52:39 +0100
commit71b185bf1984080077937a814a0d19d70faf2f77 (patch)
treea4a1e9321121de05d0ae13075a34cffad58ca9ee
parentMoved the crawler in the parent folder. (diff)
downloadnewspipe-71b185bf1984080077937a814a0d19d70faf2f77.tar.gz
newspipe-71b185bf1984080077937a814a0d19d70faf2f77.tar.bz2
newspipe-71b185bf1984080077937a814a0d19d70faf2f77.zip
using user agent in web crawler
-rw-r--r--src/conf.py3
-rw-r--r--src/web/lib/crawler.py4
2 files changed, 3 insertions, 4 deletions
diff --git a/src/conf.py b/src/conf.py
index a3e7e3bb..7db65fd1 100644
--- a/src/conf.py
+++ b/src/conf.py
@@ -33,8 +33,7 @@ DEFAULTS = {"platform_url": "https://JARR.herokuapp.com/",
"default_max_error": "3",
"log_path": "jarr.log",
"log_level": "info",
- "user_agent": "JARR "
- "(https://github.com/JARR-aggregator)",
+ "user_agent": "JARR (https://github.com/JARR-aggregator)",
"resolve_article_url": "false",
"http_proxy": "",
"secret": "",
diff --git a/src/web/lib/crawler.py b/src/web/lib/crawler.py
index 90a268e8..979ccbfc 100644
--- a/src/web/lib/crawler.py
+++ b/src/web/lib/crawler.py
@@ -52,7 +52,7 @@ class AbstractCrawler:
auth=self.auth, data=json.dumps(data,
default=default_handler),
headers={'Content-Type': 'application/json',
- 'User-Agent': 'jarr'})
+ 'User-Agent': conf.USER_AGENT})
def wait(self, max_wait=300, checks=5, wait_for=2):
checked, second_waited = 0, 0
@@ -217,7 +217,7 @@ class CrawlerScheduler(AbstractCrawler):
def prepare_headers(self, feed):
"""For a known feed, will construct some header dictionnary"""
- headers = {'User-Agent': 'jarr/crawler'}
+ headers = {'User-Agent': conf.USER_AGENT}
if feed.get('last_modified'):
headers['If-Modified-Since'] = feed['last_modified']
if feed.get('etag') and 'pyagg' not in feed['etag']:
bgstack15