From 05286186d3f24e01b5bb3cc4a9ae4ad3de345578 Mon Sep 17 00:00:00 2001 From: cedricbonhomme Date: Thu, 2 Sep 2010 16:16:19 +0200 Subject: Added detect_url_errors() function. --- utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/utils.py b/utils.py index 4b7104d1..7e7edfde 100755 --- a/utils.py +++ b/utils.py @@ -20,6 +20,7 @@ import smtplib from email.mime.text import MIMEText import urllib2 +import BaseHTTPServer from BeautifulSoup import BeautifulSoup from datetime import datetime @@ -58,6 +59,24 @@ url_finders = [ \ re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"), \ ] +def detect_url_errors(list_of_urls): + """ + Detect URL errors. + """ + errors = [] + for url in list_of_urls: + req = urllib2.Request(url) + try: + urllib2.urlopen(req) + except urllib2.HTTPError, e: + # server couldn't fulfill the request + errors.append((url, e.code, \ + BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code][1])) + except urllib2.URLError, e: + # failed to reach the server + errors.append((url, e.reason.errno ,e.reason.strerror)) + return errors + def detect_language(text): """ Detect the language of a text. -- cgit