From 71b185bf1984080077937a814a0d19d70faf2f77 Mon Sep 17 00:00:00 2001
From: François Schmidts <francois.schmidts@gmail.com>
Date: Fri, 8 Jan 2016 16:30:09 +0100
Subject: using user agent in web crawler

---
 src/conf.py            | 3 +--
 src/web/lib/crawler.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/conf.py b/src/conf.py
index a3e7e3bb..7db65fd1 100644
--- a/src/conf.py
+++ b/src/conf.py
@@ -33,8 +33,7 @@ DEFAULTS = {"platform_url": "https://JARR.herokuapp.com/",
             "default_max_error": "3",
             "log_path": "jarr.log",
             "log_level": "info",
-            "user_agent": "JARR "
-                    "(https://github.com/JARR-aggregator)",
+            "user_agent": "JARR (https://github.com/JARR-aggregator)",
             "resolve_article_url": "false",
             "http_proxy": "",
             "secret": "",
diff --git a/src/web/lib/crawler.py b/src/web/lib/crawler.py
index 90a268e8..979ccbfc 100644
--- a/src/web/lib/crawler.py
+++ b/src/web/lib/crawler.py
@@ -52,7 +52,7 @@ class AbstractCrawler:
                       auth=self.auth, data=json.dumps(data,
                                                       default=default_handler),
                       headers={'Content-Type': 'application/json',
-                               'User-Agent': 'jarr'})
+                               'User-Agent': conf.USER_AGENT})
 
     def wait(self, max_wait=300, checks=5, wait_for=2):
         checked, second_waited = 0, 0
@@ -217,7 +217,7 @@ class CrawlerScheduler(AbstractCrawler):
 
     def prepare_headers(self, feed):
         """For a known feed, will construct some header dictionnary"""
-        headers = {'User-Agent': 'jarr/crawler'}
+        headers = {'User-Agent': conf.USER_AGENT}
         if feed.get('last_modified'):
             headers['If-Modified-Since'] = feed['last_modified']
         if feed.get('etag') and 'pyagg' not in feed['etag']:
-- 
cgit