aboutsummaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rwxr-xr-xutils.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/utils.py b/utils.py
index 4b7104d1..7e7edfde 100755
--- a/utils.py
+++ b/utils.py
@@ -20,6 +20,7 @@ import smtplib
from email.mime.text import MIMEText
import urllib2
+import BaseHTTPServer
from BeautifulSoup import BeautifulSoup
from datetime import datetime
@@ -58,6 +59,24 @@ url_finders = [ \
re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"), \
]
+def detect_url_errors(list_of_urls):
+ """
+ Detect URL errors.
+ """
+ errors = []
+ for url in list_of_urls:
+ req = urllib2.Request(url)
+ try:
+ urllib2.urlopen(req)
+ except urllib2.HTTPError, e:
+ # server couldn't fulfill the request
+ errors.append((url, e.code, \
+ BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code][1]))
+ except urllib2.URLError, e:
+ # failed to reach the server
+ errors.append((url, e.reason.errno ,e.reason.strerror))
+ return errors
+
def detect_language(text):
"""
Detect the language of a text.
bgstack15