aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/crawler.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2015-02-22 09:54:29 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2015-02-22 09:54:29 +0100
commiteb73ac5952dc5e83229d58437b50feeeed215cfb (patch)
tree52498b4cc533a86c3f439b6a4f6f2e958a988d7f /pyaggr3g470r/crawler.py
parentbug fix... (diff)
downloadnewspipe-eb73ac5952dc5e83229d58437b50feeeed215cfb.tar.gz
newspipe-eb73ac5952dc5e83229d58437b50feeeed215cfb.tar.bz2
newspipe-eb73ac5952dc5e83229d58437b50feeeed215cfb.zip
Indexation is now restored.
Diffstat (limited to 'pyaggr3g470r/crawler.py')
-rw-r--r--pyaggr3g470r/crawler.py27
1 files changed, 15 insertions, 12 deletions
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index 85cdbdd0..e813c96e 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -33,6 +33,7 @@ import requests
import feedparser
import dateutil.parser
from datetime import datetime
+from itertools import chain
from bs4 import BeautifulSoup
from sqlalchemy import or_
@@ -40,6 +41,8 @@ from pyaggr3g470r import utils
from pyaggr3g470r import conf
from pyaggr3g470r import db
from pyaggr3g470r.models import User, Article
+if not conf.ON_HEROKU:
+ import pyaggr3g470r.search as fastsearch
logger = logging.getLogger(__name__)
@@ -205,8 +208,18 @@ def insert_database(user, feed):
@asyncio.coroutine
def init_process(user, feed):
+ # Fetch the feed and insert new articles in the database
articles = yield from asyncio.async(insert_database(user, feed))
#print('inserted articles for {}'.format(feed.title))
+
+ # Indexation of the new articles for the feed
+ if not conf.ON_HEROKU and articles != []:
+ try:
+ #print('indexing articles for {}'.format(feed.title))
+ fastsearch.add_to_index(user.id, articles, feed)
+ except:
+ logger.exception("Problem during indexation:")
+
return articles
def retrieve_feed(user, feed_id=None):
@@ -215,7 +228,7 @@ def retrieve_feed(user, feed_id=None):
"""
logger.info("Starting to retrieve feeds.")
- # 1 - Get the list of feeds to fetch
+ # Get the list of feeds to fetch
user = User.query.filter(User.email == user.email).first()
feeds = [feed for feed in user.feeds if feed.enabled]
if feed_id is not None:
@@ -224,20 +237,10 @@ def retrieve_feed(user, feed_id=None):
if feeds == []:
return
- # 2 - Fetch the feeds.
+ # Launch the process for all the feeds
loop = asyncio.get_event_loop()
tasks = [init_process(user, feed) for feed in feeds]
#tasks = [asyncio.async(init_process(user, feed)) for feed in feeds]
loop.run_until_complete(asyncio.wait(tasks))
- """
- # 3 - Indexation
- if not conf.ON_HEROKU:
- self.index(new_articles)
-
- # 4 - Mail notification
- if not conf.ON_HEROKU and conf.NOTIFICATION_ENABLED:
- self.mail_notification(new_articles)
- """
-
logger.info("All articles retrieved. End of the processus.")
bgstack15