aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2020-03-29 22:48:15 +0200
committerCédric Bonhomme <cedric@cedricbonhomme.org>2020-03-29 22:48:15 +0200
commit4b8e533cd591c67ad47d4e8be37e27fbd51af09a (patch)
tree99005e909567183c2c64c66560e0bebb10d897bd
parentset DEBUG to True also when SQLite is used. (diff)
downloadnewspipe-4b8e533cd591c67ad47d4e8be37e27fbd51af09a.tar.gz
newspipe-4b8e533cd591c67ad47d4e8be37e27fbd51af09a.tar.bz2
newspipe-4b8e533cd591c67ad47d4e8be37e27fbd51af09a.zip
Minor fixes to the crawler.
-rw-r--r--instance/sqlite.py2
-rw-r--r--newspipe/crawler/default_crawler.py8
-rw-r--r--newspipe/lib/article_utils.py4
-rw-r--r--newspipe/lib/feed_utils.py2
-rw-r--r--newspipe/lib/utils.py2
-rw-r--r--newspipe/templates/home.html9
6 files changed, 17 insertions, 10 deletions
diff --git a/instance/sqlite.py b/instance/sqlite.py
index 18b906dd..60047080 100644
--- a/instance/sqlite.py
+++ b/instance/sqlite.py
@@ -42,7 +42,7 @@ CRAWLER_USER_AGENT = "Newspipe (https://git.sr.ht/~cedric/newspipe)"
CRAWLER_TIMEOUT = 30
CRAWLER_RESOLV = False
RESOLVE_ARTICLE_URL = False
-FEED_REFRESH_INTERVAL = 120
+FEED_REFRESH_INTERVAL = 0
# Notification
MAIL_SERVER = "localhost"
diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py
index b1153582..d76ca4fa 100644
--- a/newspipe/crawler/default_crawler.py
+++ b/newspipe/crawler/default_crawler.py
@@ -61,7 +61,7 @@ async def parse_feed(user, feed):
# with (await sem):
try:
logger.info("Retrieving feed {}".format(feed.link))
- resp = await newspipe_get(feed.link, timeout=5)
+ resp = newspipe_get(feed.link, timeout=5)
except Exception:
logger.info("Problem when reading feed {}".format(feed.link))
return
@@ -117,8 +117,8 @@ async def insert_articles(queue, nḅ_producers=1):
if item is None:
nb_producers_done += 1
if nb_producers_done == nḅ_producers:
- print("All producers done.")
- print("Process finished.")
+ logger.info("All producers done.")
+ logger.info("Process finished.")
break
continue
@@ -179,6 +179,8 @@ async def retrieve_feed(queue, users, feed_id=None):
if feed.last_retrieved > (datetime.now() - timedelta(minutes=application.config["FEED_REFRESH_INTERVAL"])):
continue
feeds.append(feed)
+ if feed_id and feed_id == feed.id:
+ break
if feeds == []:
logger.info("No feed to retrieve for {}".format(user.nickname))
diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py
index 00023fd7..0490d4d7 100644
--- a/newspipe/lib/article_utils.py
+++ b/newspipe/lib/article_utils.py
@@ -90,13 +90,13 @@ async def get_article_details(entry, fetch=True):
):
try:
# resolves URL behind proxies (like feedproxy.google.com)
- response = await newspipe_get(article_link, timeout=5)
+ response = newspipe_get(article_link, timeout=5)
except MissingSchema:
split, failed = urlsplit(article_link), False
for scheme in "https", "http":
try:
new_link = urlunsplit(SplitResult(scheme, *split[1:]))
- response = await newspipe_get(new_link, timeout=5)
+ response = newspipe_get(new_link, timeout=5)
except Exception:
failed = True
continue
diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py
index 995bfaae..70ded817 100644
--- a/newspipe/lib/feed_utils.py
+++ b/newspipe/lib/feed_utils.py
@@ -55,6 +55,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
except Exception:
logger.exception("failed to retrieve that url")
fp_parsed = {"bozo": True}
+
assert url is not None and fp_parsed is not None
feed = feed or {}
feed_split = urllib.parse.urlsplit(url)
@@ -113,6 +114,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True):
return wrapper
if not feed.get("icon_url"):
+
icons = bs_parsed.find_all(check_keys(rel=["icon", "shortcut"]))
if not len(icons):
icons = bs_parsed.find_all(check_keys(rel=["icon"]))
diff --git a/newspipe/lib/utils.py b/newspipe/lib/utils.py
index 3d6bf0b8..7e6f3cf4 100644
--- a/newspipe/lib/utils.py
+++ b/newspipe/lib/utils.py
@@ -90,7 +90,7 @@ def redirect_url(default="home"):
return request.args.get("next") or request.referrer or url_for(default)
-async def newspipe_get(url, **kwargs):
+def newspipe_get(url, **kwargs):
request_kwargs = {
"verify": False,
"allow_redirects": True,
diff --git a/newspipe/templates/home.html b/newspipe/templates/home.html
index c7340f63..74180bc9 100644
--- a/newspipe/templates/home.html
+++ b/newspipe/templates/home.html
@@ -33,7 +33,7 @@
<span style="background-color: {{ "red" if in_error[fid] > 2 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }}</span>
{% endif %}
<span id="unread-{{ fid }}" class="badge pull-right">{{ nbunread }}</span>
- <!-- <img src="{{ url_for('icon.icon', url=feeds[fid].url) }}" width="16px"> -->
+ <img src="{{ url_for('icon.icon', url=feeds[fid].icon_url) }}" width="16px">
{{ feeds[fid].title | safe | truncate(25, True) }}
{% if feed_id == fid %}</b>{% endif %}
</a></li>
@@ -52,7 +52,7 @@
<span style="background-color: {{ "red" if in_error[fid] > 2 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }}</span>
{% endif %}
{% if feed_id == fid %}<b>{% endif %}
- <!-- <img src="{{ url_for('icon.icon', url=feeds[fid].url) }}" width="16px"> -->
+ <img src="{{ url_for('icon.icon', url=feeds[fid].icon_url) }}" width="16px">
{{ feed.title | safe | truncate(25, True) }}
{% if feed_id == fid %}</b>{% endif %}
</a></li>
@@ -126,7 +126,10 @@
{% endif %}
</td>
{% if not feed_id %}
- <td><a href="/article/redirect/{{ article.id}}" target="_blank">{{ article.source.title | safe }}</a></td>
+ <td>
+ <img src="{{ url_for('icon.icon', url=feeds[article.source.id].icon_url) }}" width="16px">
+ <a href="/article/redirect/{{ article.id}}" target="_blank">{{ article.source.title | safe }}</a>
+ </td>
{% endif %}
<td {%if filter_ == 'all' and article.readed == False %}style='font-weight:bold'{% endif %}>
<a href="/article/{{ article.id }}">{{ article.title | safe }}</a>
bgstack15