diff options
author | cedricbonhomme <devnull@localhost> | 2010-09-02 16:16:19 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2010-09-02 16:16:19 +0200 |
commit | 05286186d3f24e01b5bb3cc4a9ae4ad3de345578 (patch) | |
tree | baef64d78ad11ae076e628fa0dabd936a102029b /utils.py | |
parent | Minor bug fix in export function. (diff) | |
download | newspipe-05286186d3f24e01b5bb3cc4a9ae4ad3de345578.tar.gz newspipe-05286186d3f24e01b5bb3cc4a9ae4ad3de345578.tar.bz2 newspipe-05286186d3f24e01b5bb3cc4a9ae4ad3de345578.zip |
Added detect_url_errors() function.
Diffstat (limited to 'utils.py')
-rwxr-xr-x | utils.py | 19 |
1 files changed, 19 insertions, 0 deletions
@@ -20,6 +20,7 @@ import smtplib from email.mime.text import MIMEText import urllib2 +import BaseHTTPServer from BeautifulSoup import BeautifulSoup from datetime import datetime @@ -58,6 +59,24 @@ url_finders = [ \ re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"), \ ] +def detect_url_errors(list_of_urls): + """ + Detect URL errors. + """ + errors = [] + for url in list_of_urls: + req = urllib2.Request(url) + try: + urllib2.urlopen(req) + except urllib2.HTTPError, e: + # server couldn't fulfill the request + errors.append((url, e.code, \ + BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code][1])) + except urllib2.URLError, e: + # failed to reach the server + errors.append((url, e.reason.errno ,e.reason.strerror)) + return errors + def detect_language(text): """ Detect the language of a text. |