From c42aa9a1e2ec1a9afc0f6a3fbbc402a4d05ec4f7 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 11 Aug 2013 14:13:31 +0200 Subject: It is now possible to fetch articles with feedparser through a HTTP proxy, for example privoxy/tor. The address of the proxy is specified in the configuration file. --- source/cfg/pyAggr3g470r.cfg-sample | 1 + source/conf.py | 1 + source/feedgetter.py | 11 ++++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/source/cfg/pyAggr3g470r.cfg-sample b/source/cfg/pyAggr3g470r.cfg-sample index bc314ebe..97993533 100755 --- a/source/cfg/pyAggr3g470r.cfg-sample +++ b/source/cfg/pyAggr3g470r.cfg-sample @@ -14,3 +14,4 @@ password = your_password [misc] diaspora_pod = joindiaspora.com feed_list = ./var/feed.lst +http_proxy = 127.0.0.1:8118 diff --git a/source/conf.py b/source/conf.py index 0dc42170..98aa0d48 100644 --- a/source/conf.py +++ b/source/conf.py @@ -52,3 +52,4 @@ password = config.get('mail','password') DIASPORA_POD = config.get('misc', 'diaspora_pod') FEED_LIST = config.get('misc', 'feed_list') +HTTP_PROXY = config.get('misc', 'http_proxy') diff --git a/source/feedgetter.py b/source/feedgetter.py index 44fd7daa..cec74938 100755 --- a/source/feedgetter.py +++ b/source/feedgetter.py @@ -20,14 +20,15 @@ # along with this program. If not, see __author__ = "Cedric Bonhomme" -__version__ = "$Revision: 1.6 $" +__version__ = "$Revision: 1.7 $" __date__ = "$Date: 2010/09/02 $" -__revision__ = "$Date: 2013/06/10 $" +__revision__ = "$Date: 2013/08/11 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" import hashlib import threading +import urllib.request import feedparser from bs4 import BeautifulSoup from datetime import datetime @@ -109,7 +110,11 @@ class FeedGetter(object): """ Add the articles of the feed 'a_feed' in the SQLite base. """ - a_feed = feedparser.parse(feed_link) + if conf.HTTP_PROXY == "": + proxy = urllib.request.ProxyHandler({}) + else: + proxy = urllib.request.ProxyHandler({"http":conf.HTTP_PROXY}) + a_feed = feedparser.parse(feed_link, handlers = [proxy]) if a_feed['entries'] == []: return try: -- cgit