From 901fbd154f16268ca4c9d10af8d038d684c8c4f4 Mon Sep 17 00:00:00 2001
From: cedricbonhomme <devnull@localhost>
Date: Thu, 8 Nov 2012 23:08:35 +0100
Subject: Porting to Python 3.2. Better, faster, stronger.

---
 .hgsubstate             |  2 +-
 source/auth.py          |  6 +++---
 source/cfg/cherrypy.cfg |  2 +-
 source/conf.py          |  4 ++--
 source/export.py        |  6 +++---
 source/feedgetter.py    | 10 +++++-----
 source/mongodb.py       |  8 ++++----
 source/pyAggr3g470r.py  | 23 ++++++++++++-----------
 source/utils.py         | 38 +++++++++++++++++++-------------------
 9 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/.hgsubstate b/.hgsubstate
index 3dab1403..9a2f0dbe 100644
--- a/.hgsubstate
+++ b/.hgsubstate
@@ -1 +1 @@
-95fa18e08f277df2f4d03da06e0fe8c7c8cd4e8e source/qrcode
+43105ca671ddb7b856866a4e532dc2f21aa13290 source/qrcode
diff --git a/source/auth.py b/source/auth.py
index 5e3243f4..5a6b5de6 100755
--- a/source/auth.py
+++ b/source/auth.py
@@ -62,11 +62,11 @@ def check_credentials(username, password):
         USERS[row[0]] = row[1]
 
     m = hashlib.sha1()
-    m.update(password)
-    if username in USERS.keys() and USERS[username] == m.hexdigest():
+    m.update(password.encode())
+    if username in list(USERS.keys()) and USERS[username] == m.hexdigest():
         return None
     else:
-        return u"Incorrect username or password."
+        return "Incorrect username or password."
 
     # An example implementation which uses an ORM could be:
     # u = User.get(username)
diff --git a/source/cfg/cherrypy.cfg b/source/cfg/cherrypy.cfg
index a419504d..6e5374fa 100644
--- a/source/cfg/cherrypy.cfg
+++ b/source/cfg/cherrypy.cfg
@@ -21,4 +21,4 @@ tools.staticdir.match = "(?i)^.+\.css$"
 [/images]
 tools.staticdir.on = True
 tools.staticdir.dir = "img"
-tools.staticdir.match = "(?i)^.+\.png$"
\ No newline at end of file
+tools.staticdir.match = "(?i)^.+\.png$"
diff --git a/source/conf.py b/source/conf.py
index b45b1d51..3f08efcf 100644
--- a/source/conf.py
+++ b/source/conf.py
@@ -28,9 +28,9 @@ __license__ = "GPLv3"
 
 
 import os
-import ConfigParser
+import configparser
 # load the configuration
-config = ConfigParser.SafeConfigParser()
+config = configparser.SafeConfigParser()
 try:
     config.read("./cfg/pyAggr3g470r.cfg")
 except:
diff --git a/source/export.py b/source/export.py
index 8380682a..f2fbd729 100644
--- a/source/export.py
+++ b/source/export.py
@@ -160,8 +160,8 @@ def export_pdf(feeds):
     Export the articles given in parameter in PDF files.
     """
     from xhtml2pdf import pisa
-    import cStringIO as StringIO
-    for feed in feeds.values():
+    import io as StringIO
+    for feed in list(feeds.values()):
             # creates folder for each stream
             folder = utils.path + "/var/export/pdf/" + \
                     utils.normalize_filename(feed.feed_title.strip().replace(':', '').lower())
@@ -171,7 +171,7 @@ def export_pdf(feeds):
                 # directories already exists (not a problem)
                 pass
 
-            for article in feed.articles.values():
+            for article in list(feed.articles.values()):
                 name = article.article_date.strip().replace(' ', '_')
                 name = os.path.normpath(folder + "/" + name + ".pdf")
                 
diff --git a/source/feedgetter.py b/source/feedgetter.py
index 2836656c..33085c14 100755
--- a/source/feedgetter.py
+++ b/source/feedgetter.py
@@ -29,7 +29,7 @@ __license__ = "GPLv3"
 import hashlib
 import threading
 import feedparser
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
 
 from datetime import datetime
 
@@ -105,9 +105,9 @@ class FeedGetter(object):
         collection_dic = {"feed_id": feed_id, \
                             "type": 0, \
                             "feed_image": feed_image, \
-                            "feed_title": utils.clear_string(a_feed.feed.title.encode('utf-8')), \
+                            "feed_title": utils.clear_string(a_feed.feed.title), \
                             "feed_link": feed_link, \
-                            "site_link": a_feed.feed.link.encode('utf-8'), \
+                            "site_link": a_feed.feed.link, \
                             "mail": False \
                         }
 
@@ -140,7 +140,7 @@ class FeedGetter(object):
             article = {"article_id": article_id, \
                     "type":1, \
                     "article_date": post_date, \
-                    "article_link": article.link.encode('utf-8'), \
+                    "article_link": article.link, \
                     "article_title": article_title, \
                     "article_content": description, \
                     "article_readed": False, \
@@ -153,7 +153,7 @@ class FeedGetter(object):
 
         # send new articles by e-mail if desired.
         #threading.Thread(None, utils.send_mail, None, (conf.mail_from, conf.mail_to, \
-                            #a_feed.feed.title.encode('utf-8'), \
+                            #a_feed.feed.title, \
                             #article_title, description) \
                         #).start()
 
diff --git a/source/mongodb.py b/source/mongodb.py
index e83b7324..68ccf5bc 100644
--- a/source/mongodb.py
+++ b/source/mongodb.py
@@ -78,7 +78,7 @@ class Articles(object):
         """
         Return information about a feed.
         """
-        return self.db[str(feed_id)].find().next()
+        return next(self.db[str(feed_id)].find())
 
     def get_all_feeds(self, condition=None):
         """
@@ -93,7 +93,7 @@ class Articles(object):
                 else:
                     cursor = self.db[collection_name].find({"type":0, condition[0]:condition[1]})
                 if cursor.count() != 0:
-                    feeds.append(cursor.next())
+                    feeds.append(next(cursor))
         feeds.sort(key = lambda elem: elem['feed_title'].lower())
         return feeds
 
@@ -114,7 +114,7 @@ class Articles(object):
         Get an article of a specified feed.
         """
         collection = self.db[str(feed_id)]
-        return collection.find({"article_id":article_id}).next()
+        return next(collection.find({"article_id":article_id}))
 
     def get_articles_from_collection(self, feed_id, condition=None, limit=1000000000):
         """
@@ -270,7 +270,7 @@ if __name__ == "__main__":
 
     #articles.add_articles([article_dic1, article_dic2], 42)
 
-    print "All articles:"
+    print("All articles:")
     #print articles.get_all_articles()
 
 
diff --git a/source/pyAggr3g470r.py b/source/pyAggr3g470r.py
index 83834853..8d092fcb 100755
--- a/source/pyAggr3g470r.py
+++ b/source/pyAggr3g470r.py
@@ -52,8 +52,8 @@ import export
 import mongodb
 import feedgetter
 from auth import AuthController, require, member_of, name_is
-from qrcode.pyqrnative.PyQRNative import QRCode, QRErrorCorrectLevel, CodeOverflowException
-from qrcode import qr
+#from qrcode.pyqrnative.PyQRNative import QRCode, QRErrorCorrectLevel, CodeOverflowException
+#from qrcode import qr
 
 
 def error_page_404(status, message, traceback, version):
@@ -193,7 +193,7 @@ class pyAggr3g470r(object):
 
                 # Descrition for the CSS ToolTips
                 article_content = utils.clear_string(article["article_content"])
-                if article_content:
+                if False:
                     description = " ".join(article_content.split(' ')[:55])
                 else:
                     description = "No description."
@@ -407,7 +407,7 @@ class pyAggr3g470r(object):
 
                         # descrition for the CSS ToolTips
                         article_content = utils.clear_string(article["article_content"])
-                        if article_content:
+                        if False:
                             description = " ".join(article_content[:500].split(' ')[:-1])
                         else:
                             description = "No description."
@@ -481,7 +481,7 @@ class pyAggr3g470r(object):
             html += description + "\n<br /><br /><br />"
         else:
             html += "No description available.\n<br /><br /><br />"
-
+        """
         # Generation of the QR Code for the current article
         try:
             os.makedirs("./var/qrcode/")
@@ -496,19 +496,20 @@ class pyAggr3g470r(object):
                 f = qr.QRUrl(url = "URL too long.")
                 f.make()
             f.save("./var/qrcode/"+article_id+".png")
+        """
 
         # Previous and following articles
         previous, following = None, None
         liste = self.mongo.get_articles_from_collection(feed_id)
         for current_article in self.mongo.get_articles_from_collection(feed_id):
-            articles.next()
+            next(articles)
             if current_article["article_id"] == article_id:
                 break
             following = current_article
         if following is None:
             following = liste[liste.count()-1]
         try:
-            previous = articles.next()
+            previous = next(articles)
         except StopIteration:
             previous = liste[0]
 
@@ -926,7 +927,7 @@ class pyAggr3g470r(object):
             query_string = "year:" + the_year + "-month"
         if "month" not in query:
             html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \
-                        utils.tag_cloud([(elem, timeline[elem]) for elem in timeline.keys()], query_string) + '</div>'
+                        utils.tag_cloud([(elem, timeline[elem]) for elem in list(timeline.keys())], query_string) + '</div>'
         html += '<br /><br /><h1>Search with a month+year picker</h1>\n'
         html += '<form>\n\t<input name="m" type="month">\n\t<input type="submit" value="Go">\n</form>'
         html += '<hr />'
@@ -1217,8 +1218,8 @@ class pyAggr3g470r(object):
         getattr(export, export_method)(self.mongo)
         try:
             getattr(export, export_method)(self.mongo)
-        except Exception, e:
-            print e
+        except Exception as e:
+            print(e)
             return self.error_page(e)
         return self.management()
 
@@ -1231,7 +1232,7 @@ class pyAggr3g470r(object):
         """
         try:
             from epub import ez_epub
-        except Exception, e:
+        except Exception as e:
             return self.error_page(e)
         try:
             feed_id, article_id = param.split(':')
diff --git a/source/utils.py b/source/utils.py
index 7d1eaecc..da26d022 100755
--- a/source/utils.py
+++ b/source/utils.py
@@ -37,18 +37,18 @@ __license__ = "GPLv3"
 import os
 import re
 import operator
-import urlparse
+import urllib.parse
 import calendar
 import unicodedata
-import htmlentitydefs
+import html.entities
 
 import smtplib
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 
-import urllib2
-import BaseHTTPServer
-from BeautifulSoup import BeautifulSoup
+import urllib.request, urllib.error, urllib.parse
+import http.server
+from bs4 import BeautifulSoup
 
 from datetime import datetime
 from collections import Counter
@@ -70,14 +70,14 @@ def detect_url_errors(list_of_urls):
     """
     errors = []
     for url in list_of_urls:
-        req = urllib2.Request(url)
+        req = urllib.request.Request(url)
         try:
-            urllib2.urlopen(req)
-        except urllib2.HTTPError, e:
+            urllib.request.urlopen(req)
+        except urllib.error.HTTPError as e:
             # server couldn't fulfill the request
             errors.append((url, e.code, \
-                BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code][1]))
-        except urllib2.URLError, e:
+                http.server.BaseHTTPRequestHandler.responses[e.code][1]))
+        except urllib.error.URLError as e:
             # failed to reach the server
             errors.append((url, e.reason.errno ,e.reason.strerror))
     return errors
@@ -87,9 +87,9 @@ def clear_string(data):
     Clear a string by removing HTML tags, HTML special caracters
     and consecutive white spaces (more that one).
     """
-    p = re.compile(r'<[^<]*?/?>') # HTML tags
-    q = re.compile(r'\s') # consecutive white spaces
-    return p.sub('', q.sub(' ', data))
+    p = re.compile(b'<[^<]*?/?>') # HTML tags
+    q = re.compile(b'\s') # consecutive white spaces
+    return p.sub(b'', q.sub(b' ', bytes(data, "utf-8"))).decode("utf-8", "strict")
 
 def unescape(text):
     """
@@ -101,15 +101,15 @@ def unescape(text):
             # character reference
             try:
                 if text[:3] == "&#x":
-                    return unichr(int(text[3:-1], 16))
+                    return chr(int(text[3:-1], 16))
                 else:
-                    return unichr(int(text[2:-1]))
+                    return chr(int(text[2:-1]))
             except ValueError:
                 pass
         else:
             # named entity
             try:
-                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+                text = chr(html.entities.name2codepoint[text[1:-1]])
             except KeyError:
                 pass
         return text # leave as is
@@ -244,7 +244,7 @@ def change_feed_url(old_feed_url, new_feed_url):
     # Replace the URL in the text file
     with open("./var/feed.lst", "r") as f:
         lines = f.readlines()
-    lines = map(str.strip, lines)
+    lines = list(map(str.strip, lines))
     try:
         lines[lines.index(old_feed_url)] = new_feed_url
     except:
@@ -271,7 +271,7 @@ def search_feed(url):
     """
     soup = None
     try:
-        page = urllib2.urlopen(url)
+        page = urllib.request.urlopen(url)
         soup = BeautifulSoup(page)
     except:
         return None
@@ -279,6 +279,6 @@ def search_feed(url):
     feed_links.extend(soup('link', type='application/rss+xml'))
     for feed_link in feed_links:
         if url not in feed_link['href']:
-            return urlparse.urljoin(url, feed_link['href'])
+            return urllib.parse.urljoin(url, feed_link['href'])
         return feed_link['href']
     return None
-- 
cgit