From 9f1120df8e76e90368e888d1f5791defc7204d2f Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 8 Sep 2013 11:04:47 +0200 Subject: Added a function which opens a URL safely. --- source/feedgetter.py | 2 +- source/utils.py | 47 ++++++++++++++++++++++++----------------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/source/feedgetter.py b/source/feedgetter.py index a27a068c..6be8b28d 100755 --- a/source/feedgetter.py +++ b/source/feedgetter.py @@ -107,7 +107,7 @@ class FeedGetter(object): Executed in a thread. """ - if utils.detect_url_errors([the_good_url]) == []: + if utils.open_url(the_good_url)[0] == True: # if ressource is available add the articles in the base. self.add_into_database(the_good_url, feed_original) diff --git a/source/utils.py b/source/utils.py index 173b08cd..e2bf69ab 100755 --- a/source/utils.py +++ b/source/utils.py @@ -80,33 +80,31 @@ def opened_w_error(filename, mode="r"): finally: f.close() -def detect_url_errors(list_of_urls): +def open_url(url): """ - Detect URL errors. - Return a list of error(s). + Open an URL with proxy and the user-agent + specified in the configuration file. """ - errors = [] if conf.HTTP_PROXY == "": proxy = {} else: proxy = {"http" : conf.HTTP_PROXY} opener = urllib.request.FancyURLopener(proxy) - for url in list_of_urls: - try: - opener = urllib.request.build_opener() - opener.addheaders = [('User-agent', conf.USER_AGENT)] - opener.open(url) - except urllib.error.HTTPError as e: - # server couldn't fulfill the request - errors.append((url, e.code, \ - http.server.BaseHTTPRequestHandler.responses[e.code][1])) - except urllib.error.URLError as e: - # failed to reach the server - if type(e.reason) == str: - errors.append((url, e.reason, e.reason)) - else: - errors.append((url, e.reason.errno, e.reason.strerror)) - return errors + try: + opener = urllib.request.build_opener() + opener.addheaders = [('User-agent', conf.USER_AGENT)] + return (True, opener.open(url)) + except urllib.error.HTTPError as e: + # server couldn't fulfill the request + errors.append((url, e.code, \ + http.server.BaseHTTPRequestHandler.responses[e.code][1])) + except urllib.error.URLError as e: + # failed to reach the server + if type(e.reason) == str: + errors.append((url, e.reason, e.reason)) + else: + errors.append((url, e.reason.errno, e.reason.strerror)) + return (False, errors) def generate_qr_code(article): """ @@ -291,10 +289,13 @@ def search_feed(url): """ Search a feed in a HTML page. """ - soup = None + soup, page = None, None try: - req = urllib.request.Request(url, headers={'User-Agent' : conf.USER_AGENT}) - page = urllib.request.urlopen(req) + result = open_url(url) + if result[0] == True: + page = open_url(url)[1] + else: + return None soup = BeautifulSoup(page) except: return None -- cgit