aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <kimble.mandel@gmail.com>2013-09-08 11:04:47 +0200
committerCédric Bonhomme <kimble.mandel@gmail.com>2013-09-08 11:04:47 +0200
commit9f1120df8e76e90368e888d1f5791defc7204d2f (patch)
treeee778e394f8446a2482d126c177d86459a2925f9
parentdetect_url_erros() now uses the proxy. (diff)
downloadnewspipe-9f1120df8e76e90368e888d1f5791defc7204d2f.tar.gz
newspipe-9f1120df8e76e90368e888d1f5791defc7204d2f.tar.bz2
newspipe-9f1120df8e76e90368e888d1f5791defc7204d2f.zip
Added a function which opens a URL safely.
-rwxr-xr-xsource/feedgetter.py2
-rwxr-xr-xsource/utils.py47
2 files changed, 25 insertions, 24 deletions
diff --git a/source/feedgetter.py b/source/feedgetter.py
index a27a068c..6be8b28d 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -107,7 +107,7 @@ class FeedGetter(object):
Executed in a thread.
"""
- if utils.detect_url_errors([the_good_url]) == []:
+ if utils.open_url(the_good_url)[0] == True:
# if ressource is available add the articles in the base.
self.add_into_database(the_good_url, feed_original)
diff --git a/source/utils.py b/source/utils.py
index 173b08cd..e2bf69ab 100755
--- a/source/utils.py
+++ b/source/utils.py
@@ -80,33 +80,31 @@ def opened_w_error(filename, mode="r"):
finally:
f.close()
-def detect_url_errors(list_of_urls):
+def open_url(url):
"""
- Detect URL errors.
- Return a list of error(s).
+ Open an URL with proxy and the user-agent
+ specified in the configuration file.
"""
- errors = []
if conf.HTTP_PROXY == "":
proxy = {}
else:
proxy = {"http" : conf.HTTP_PROXY}
opener = urllib.request.FancyURLopener(proxy)
- for url in list_of_urls:
- try:
- opener = urllib.request.build_opener()
- opener.addheaders = [('User-agent', conf.USER_AGENT)]
- opener.open(url)
- except urllib.error.HTTPError as e:
- # server couldn't fulfill the request
- errors.append((url, e.code, \
- http.server.BaseHTTPRequestHandler.responses[e.code][1]))
- except urllib.error.URLError as e:
- # failed to reach the server
- if type(e.reason) == str:
- errors.append((url, e.reason, e.reason))
- else:
- errors.append((url, e.reason.errno, e.reason.strerror))
- return errors
+ try:
+ opener = urllib.request.build_opener()
+ opener.addheaders = [('User-agent', conf.USER_AGENT)]
+ return (True, opener.open(url))
+ except urllib.error.HTTPError as e:
+ # server couldn't fulfill the request
+ errors.append((url, e.code, \
+ http.server.BaseHTTPRequestHandler.responses[e.code][1]))
+ except urllib.error.URLError as e:
+ # failed to reach the server
+ if type(e.reason) == str:
+ errors.append((url, e.reason, e.reason))
+ else:
+ errors.append((url, e.reason.errno, e.reason.strerror))
+ return (False, errors)
def generate_qr_code(article):
"""
@@ -291,10 +289,13 @@ def search_feed(url):
"""
Search a feed in a HTML page.
"""
- soup = None
+ soup, page = None, None
try:
- req = urllib.request.Request(url, headers={'User-Agent' : conf.USER_AGENT})
- page = urllib.request.urlopen(req)
+ result = open_url(url)
+ if result[0] == True:
+ page = open_url(url)[1]
+ else:
+ return None
soup = BeautifulSoup(page)
except:
return None
bgstack15