aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/lib
diff options
context:
space:
mode:
authorFrançois Schmidts <francois.schmidts@gmail.com>2015-07-07 14:42:26 +0200
committerFrançois Schmidts <francois.schmidts@gmail.com>2015-07-07 14:42:26 +0200
commit9c9e1058c588a3e2f80e35c5dd95bac234e597f4 (patch)
tree12b8cc24e0ef68c95c6950e306c4494ed3c1f273 /pyaggr3g470r/lib
parentmaking the crawler getting the feed with high traffic earlier (diff)
downloadnewspipe-9c9e1058c588a3e2f80e35c5dd95bac234e597f4.tar.gz
newspipe-9c9e1058c588a3e2f80e35c5dd95bac234e597f4.tar.bz2
newspipe-9c9e1058c588a3e2f80e35c5dd95bac234e597f4.zip
rebuilding feed url as well
Diffstat (limited to 'pyaggr3g470r/lib')
-rw-r--r--pyaggr3g470r/lib/feed_utils.py7
-rw-r--r--pyaggr3g470r/lib/utils.py2
2 files changed, 6 insertions, 3 deletions
diff --git a/pyaggr3g470r/lib/feed_utils.py b/pyaggr3g470r/lib/feed_utils.py
index a7149d79..367fd4b5 100644
--- a/pyaggr3g470r/lib/feed_utils.py
+++ b/pyaggr3g470r/lib/feed_utils.py
@@ -23,6 +23,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
assert url is not None and fp_parsed is not None
feed = feed or {}
feed_split = urllib.parse.urlsplit(url)
+ site_split = None
if not fp_parsed['bozo']:
feed['link'] = url
feed['site_link'] = try_keys(fp_parsed['feed'], 'href', 'link')
@@ -82,8 +83,8 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
del feed['icon']
if not feed.get('link'):
- alternate = bs_parsed.find_all(check_keys(rel=['alternate'],
+ alternates = bs_parsed.find_all(check_keys(rel=['alternate'],
type=['application/rss+xml']))
- if len(alternate) >= 1:
- feed['link'] = alternate[0].attrs['href']
+ if len(alternates) >= 1:
+ feed['link'] = rebuild_url(alternates[0].attrs['href'], feed_split)
return feed
diff --git a/pyaggr3g470r/lib/utils.py b/pyaggr3g470r/lib/utils.py
index 280256f6..62284de1 100644
--- a/pyaggr3g470r/lib/utils.py
+++ b/pyaggr3g470r/lib/utils.py
@@ -42,6 +42,8 @@ def rebuild_url(url, base_split):
def try_splits(url, *splits):
for split in splits:
+ if split is None:
+ continue
rb_url = rebuild_url(url, split)
response = requests.get(rb_url, verify=False, timeout=10)
if response.ok and 'html' not in response.headers['content-type']:
bgstack15