diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2020-03-29 22:48:15 +0200 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2020-03-29 22:48:15 +0200 |
commit | 4b8e533cd591c67ad47d4e8be37e27fbd51af09a (patch) | |
tree | 99005e909567183c2c64c66560e0bebb10d897bd | |
parent | set DEBUG to True also when SQLite is used. (diff) | |
download | newspipe-4b8e533cd591c67ad47d4e8be37e27fbd51af09a.tar.gz newspipe-4b8e533cd591c67ad47d4e8be37e27fbd51af09a.tar.bz2 newspipe-4b8e533cd591c67ad47d4e8be37e27fbd51af09a.zip |
Minor fixes to the crawler.
-rw-r--r-- | instance/sqlite.py | 2 | ||||
-rw-r--r-- | newspipe/crawler/default_crawler.py | 8 | ||||
-rw-r--r-- | newspipe/lib/article_utils.py | 4 | ||||
-rw-r--r-- | newspipe/lib/feed_utils.py | 2 | ||||
-rw-r--r-- | newspipe/lib/utils.py | 2 | ||||
-rw-r--r-- | newspipe/templates/home.html | 9 |
6 files changed, 17 insertions, 10 deletions
diff --git a/instance/sqlite.py b/instance/sqlite.py index 18b906dd..60047080 100644 --- a/instance/sqlite.py +++ b/instance/sqlite.py @@ -42,7 +42,7 @@ CRAWLER_USER_AGENT = "Newspipe (https://git.sr.ht/~cedric/newspipe)" CRAWLER_TIMEOUT = 30 CRAWLER_RESOLV = False RESOLVE_ARTICLE_URL = False -FEED_REFRESH_INTERVAL = 120 +FEED_REFRESH_INTERVAL = 0 # Notification MAIL_SERVER = "localhost" diff --git a/newspipe/crawler/default_crawler.py b/newspipe/crawler/default_crawler.py index b1153582..d76ca4fa 100644 --- a/newspipe/crawler/default_crawler.py +++ b/newspipe/crawler/default_crawler.py @@ -61,7 +61,7 @@ async def parse_feed(user, feed): # with (await sem): try: logger.info("Retrieving feed {}".format(feed.link)) - resp = await newspipe_get(feed.link, timeout=5) + resp = newspipe_get(feed.link, timeout=5) except Exception: logger.info("Problem when reading feed {}".format(feed.link)) return @@ -117,8 +117,8 @@ async def insert_articles(queue, nḅ_producers=1): if item is None: nb_producers_done += 1 if nb_producers_done == nḅ_producers: - print("All producers done.") - print("Process finished.") + logger.info("All producers done.") + logger.info("Process finished.") break continue @@ -179,6 +179,8 @@ async def retrieve_feed(queue, users, feed_id=None): if feed.last_retrieved > (datetime.now() - timedelta(minutes=application.config["FEED_REFRESH_INTERVAL"])): continue feeds.append(feed) + if feed_id and feed_id == feed.id: + break if feeds == []: logger.info("No feed to retrieve for {}".format(user.nickname)) diff --git a/newspipe/lib/article_utils.py b/newspipe/lib/article_utils.py index 00023fd7..0490d4d7 100644 --- a/newspipe/lib/article_utils.py +++ b/newspipe/lib/article_utils.py @@ -90,13 +90,13 @@ async def get_article_details(entry, fetch=True): ): try: # resolves URL behind proxies (like feedproxy.google.com) - response = await newspipe_get(article_link, timeout=5) + response = newspipe_get(article_link, timeout=5) except MissingSchema: split, failed = urlsplit(article_link), False for scheme in "https", "http": try: new_link = urlunsplit(SplitResult(scheme, *split[1:])) - response = await newspipe_get(new_link, timeout=5) + response = newspipe_get(new_link, timeout=5) except Exception: failed = True continue diff --git a/newspipe/lib/feed_utils.py b/newspipe/lib/feed_utils.py index 995bfaae..70ded817 100644 --- a/newspipe/lib/feed_utils.py +++ b/newspipe/lib/feed_utils.py @@ -55,6 +55,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): except Exception: logger.exception("failed to retrieve that url") fp_parsed = {"bozo": True} + assert url is not None and fp_parsed is not None feed = feed or {} feed_split = urllib.parse.urlsplit(url) @@ -113,6 +114,7 @@ def construct_feed_from(url=None, fp_parsed=None, feed=None, query_site=True): return wrapper if not feed.get("icon_url"): + icons = bs_parsed.find_all(check_keys(rel=["icon", "shortcut"])) if not len(icons): icons = bs_parsed.find_all(check_keys(rel=["icon"])) diff --git a/newspipe/lib/utils.py b/newspipe/lib/utils.py index 3d6bf0b8..7e6f3cf4 100644 --- a/newspipe/lib/utils.py +++ b/newspipe/lib/utils.py @@ -90,7 +90,7 @@ def redirect_url(default="home"): return request.args.get("next") or request.referrer or url_for(default) -async def newspipe_get(url, **kwargs): +def newspipe_get(url, **kwargs): request_kwargs = { "verify": False, "allow_redirects": True, diff --git a/newspipe/templates/home.html b/newspipe/templates/home.html index c7340f63..74180bc9 100644 --- a/newspipe/templates/home.html +++ b/newspipe/templates/home.html @@ -33,7 +33,7 @@ <span style="background-color: {{ "red" if in_error[fid] > 2 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }}</span> {% endif %} <span id="unread-{{ fid }}" class="badge pull-right">{{ nbunread }}</span> - <!-- <img src="{{ url_for('icon.icon', url=feeds[fid].url) }}" width="16px"> --> + <img src="{{ url_for('icon.icon', url=feeds[fid].icon_url) }}" width="16px"> {{ feeds[fid].title | safe | truncate(25, True) }} {% if feed_id == fid %}</b>{% endif %} </a></li> @@ -52,7 +52,7 @@ <span style="background-color: {{ "red" if in_error[fid] > 2 else "orange" }} ;" class="badge pull-right" title="Some errors occured while trying to retrieve that feed.">{{ in_error[fid] }}</span> {% endif %} {% if feed_id == fid %}<b>{% endif %} - <!-- <img src="{{ url_for('icon.icon', url=feeds[fid].url) }}" width="16px"> --> + <img src="{{ url_for('icon.icon', url=feeds[fid].icon_url) }}" width="16px"> {{ feed.title | safe | truncate(25, True) }} {% if feed_id == fid %}</b>{% endif %} </a></li> @@ -126,7 +126,10 @@ {% endif %} </td> {% if not feed_id %} - <td><a href="/article/redirect/{{ article.id}}" target="_blank">{{ article.source.title | safe }}</a></td> + <td> + <img src="{{ url_for('icon.icon', url=feeds[article.source.id].icon_url) }}" width="16px"> + <a href="/article/redirect/{{ article.id}}" target="_blank">{{ article.source.title | safe }}</a> + </td> {% endif %} <td {%if filter_ == 'all' and article.readed == False %}style='font-weight:bold'{% endif %}> <a href="/article/{{ article.id }}">{{ article.title | safe }}</a> |