From 4a8438d7f2b7b16941240b91f39a9402c431ffc2 Mon Sep 17 00:00:00 2001
From: François Schmidts <francois.schmidts@gmail.com>
Date: Tue, 2 Feb 2016 23:30:57 +0100
Subject: writing a bit of doc, moving crawler together

---
 AUTHORS.rst                    |   6 +-
 CHANGELOG.md                   | 512 ----------------------------------------
 CHANGELOG.rst                  | 518 +++++++++++++++++++++++++++++++++++++++++
 README.rst                     |  31 ++-
 src/crawler.py                 | 168 -------------
 src/crawler/classic_crawler.py | 168 +++++++++++++
 src/crawler/http_crawler.py    | 251 ++++++++++++++++++++
 src/manager.py                 |   6 +-
 src/web/lib/crawler.py         | 251 --------------------
 src/web/utils.py               |   4 +-
 10 files changed, 959 insertions(+), 956 deletions(-)
 delete mode 100644 CHANGELOG.md
 create mode 100644 CHANGELOG.rst
 delete mode 100644 src/crawler.py
 create mode 100644 src/crawler/classic_crawler.py
 create mode 100644 src/crawler/http_crawler.py
 delete mode 100644 src/web/lib/crawler.py

diff --git a/AUTHORS.rst b/AUTHORS.rst
index 5a6f2cc0..dfac5267 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -1,6 +1,8 @@
-pyAggr3g470r is a free software written and maintained
+Original author
+---------------
+JARR is a free software written and maintained
 by Cédric Bonhomme https://www.cedricbonhomme.org
 
 Contributors
-````````````
+------------
 - François Schmidts http://1pxsolidblack.pl/
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index 1f3edea2..00000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,512 +0,0 @@
-=================
-Release History
-=================
-
-6.7 (2015-07-21)
-----------------
-    New:
-     * a filter mechanism for feeds has been added (PR #14);
-     * icon of feeds is now an url retrieved from the feed or the site link (PR #15).
-    Improvements:
-     * improvements for the bookmarklet (PR #16 and PR #18);
-     * performance improvements (display of the /feed page);
-     * enhancements for the two crawlers;
-     * enhancements of the UI (PR #14);
-     * misc changes to the models and controllers.
-
-6.6 (2015-06-02)
-----------------
-    New:
-     * it is now possible to sort the list of articles by feed title or
-     article title;
-     * added base unittests.
-    Improvements:
-     * fixed some minor bugs;
-     * improved the asyncio crawler;
-     * automatically use the good Python executable for the asyncio crawler;
-     * improved controllers (enforcing better use of user_id);
-     * the search is now case insensitive.
-
-6.5.5 (2015-04-22)
-------------------
-    The full text search powered by Whoosh has been removed.
-
-6.5.4 (2015-04-16)
-------------------
-    This release introduces a new config module and a new search functionality.
-    The result of a search is integrated in the main page.
-
-6.5.3 (2015-04-14)
-------------------
-    The fetch call is now dependent to the selected crawling method.
-
-6.5.2 (2015-04-14)
-------------------
-    The look and feel has been globally improved.
-    It is now possible to add a new feed from any page via a dropdown menu.
-
-6.5.1 (2015-04-08)
-------------------
-    Improvements:
-     * improvements on the controllers;
-     * the minimum error count is now specified in the configuration file.
-
-6.5 (2015-04-07)
-----------------
-    Improvements:
-     * new CSS;
-     * improved installation script;
-     * it is now possible to delete all duplicate articles with one HTTP
-     delete request.
-
-6.4 (2015-03-17)
-----------------
-    New:
-     * a new page 'history' to explore your database of articles.
-    Changes:
-     * updated documentation;
-     * minor improvements.
-    Fixes:
-     * changed the type of the column 'last_modified' to string.
-
-6.3 (2015-03-08)
-----------------
-    New:
-    * a new architecture with base for controllers;
-    * new, more complete RESTful API;
-    * a crawler handling errors and cache control;
-    * the new crawler takes advantage of the RESTful API
-    (can be run on the client side).
-
-6.2 (2015-02-26)
-----------------
-    The system of email notifications for new articles has been removed.
-    This feature was hardly used.
-
-6.1 (2015-02-23)
-----------------
-    Improvements: articles are now identified with the id provided
-    by the RSS/ATOM feed.
-    Prevent BeautifulSoup4 from adding extra '<html><body>' tags to
-    the soup with the 'lxml' parser.
-    Indexation is now used with the new crawler.
-    The documentation has been updated.
-
-6.0 (2015-02-08)
-----------------
-    New: pyAggr3g470r is now working with Python 3.4. Tested on Heroku
-    with Python 3.4.2.
-    Improvements: The feed crawler uses the PEP 3156 (asyncio). The action
-    buttons are now on the left. It is easier to mark an article as read.
-
-5.7.0 (2014-11-20)
-------------------
-    Improvements: major improvements of the email notifications system.
-    Notifications are now sent through Postmark (for example for Heroku)
-    or a standard SMTP server.
-    Bug fix: activation key was too long for the database column.
-
-5.6.6 (2014-09-24)
-------------------
-    Improvements: Significant performance improvement for the views
-    /favorites and /unread. The performance problem has been introduced
-    with the release 5.6.5.
-
-5.6.5 (2014-09-15)
-------------------
-    This release fixes a major bug introduced with the version 0.9.7 of SQLAlchemy
-    (conflicts between persistent ant transcient SQLAlchemy objects).
-
-5.6.4 (2014-09-12)
-------------------
-    Translations have been updated.
-    Gravatar extension has been removed.
-    Minor fix.
-
-5.6.3 (2014-08-27)
-------------------
-    News: It is now possible to delete articles older than a given number
-    of weeks. This can be done via the management page.
-    A new environment variable enables to activate or deactivate the
-    self-registration.
-    Changes: translations were updated. Some minor bugfixes. Improved
-    deployment instructions in the README.
-
-5.6.2 (2014-08-10)
-------------------
-    Minor improvements: Articles are automatically retrieved after the import
-    of an OPML file.
-    When displaying all articles (unread + read), titles of unread articles
-    are emphasized in bold.
-    A new tab is opened when clicking on the title of an article.
-    New: pyAggr3g470r can be deployed with the Heroku auto deploy button.
-
-5.6.1 (2014-07-13)
-------------------
-    Performance improvements: faster database insertion of articles by
-    the crawler and loading of the management page.
-    Minor bug fixes.
-
-5.6 (2014-07-05)
-----------------
-    pyAggr3g470r has now a RESTful JSON API which enables to manage Feed and
-    Article objects. The API is documented in the README file.
-    The main page is using a subset of this API with jQuery HTTP requests.
-
-5.5 (2014-06-14)
-----------------
-    This release introduces a redesigned homepage which loads much faster and
-    is easier to read. pyAggr3g470r can now be run by Apache.
-    Adding a feed no longer requires a title and a site link.
-
-5.4 (2014-05-28)
-----------------
-    This version makes it possible for a user to export all of their feeds and
-    articles as a JSON file for later restoration.
-
-5.3 (2014-05-23)
-----------------
-    This release introduces some UI improvements, especially for the home page.
-
-5.2 (2014-05-16)
-----------------
-    This release adds minor bug fixes and UI improvements.
-
-5.1 (2014-05-13)
-----------------
-    When deployed on Heroku, the platform now uses the Postmark service to
-    send account confirmation emails to users. It is no longer required to
-    enter a first name and a last name to create an account.
-
-5.0 (2014-05-04)
-----------------
-    pyAggr3g470r is now translated into English and French. Improvements
-    concerning the news retriever and the Whoosh search functionality have
-    been made. The user can now export all articles in JSON format.
-    The user of the platform now has the possibility to delete his or her
-    account.
-
-4.9 (2014-04-24)
-----------------
-    This version introduces minor improvements to the Jinja templates and
-    bugfixes (relating to the import of OPML files with sub-categories and
-    relating to the Whoosh index generation).
-
-4.8 (2014-04-13)
-----------------
-    Feeds are now retrieved in a separated process with the Gevent library.
-    This offers the best performance on Heroku.
-
-4.7 (2014-04-12)
-----------------
-    pyAggr3g470r can now be deployed on Heroku or on a traditional server.
-    Moreover, several users can use an instance of pyAggr3g470r. A platform is
-    managed by the administrator, a user with specific rights.
-
-4.6 (2014-02-09)
-----------------
-    This release introduces the import of OPML files of subscriptions.
-    Minor improvements have been made to the templates.
-
-4.5 (2014-01-29)
-----------------
-    This release introduces a one step installation process with a simple
-    script. Minor improvements to the feedgetter module have been introduced
-    (the feed description is now stored in the database). Miscellaneous
-    improvements to the Jinja templates. Finally, more configuration options
-    are now offered to the user.
-
-4.4 (2013-12-27)
-----------------
-    This version introduces some improvements for the feedgetter module
-    including automatic retrieval of the URL behind feedproxy.google.com,
-    and support for configuring the user agent and proxy. Minor improvements
-    were made to the MongoEngine models. Notifications are displayed with
-    Flask flash messages.
-
-4.3 (2013-12-03)
-----------------
-    With this release, the user is able to update her personal information.
-    It is now possible to enable/disable the checking of updates for a feed.
-    Some performance improvements and user interface optimizations have been
-    done.
-
-4.2 (2013-11-10)
-----------------
-    This is the first release of the new version of pyAggr3g470r.
-    The code has been rewritten with the Flask microframework and the
-    Bootstrap frontend framework.
-
-4.1 (2013-08-11)
-----------------
-    HTTP proxy support has been added for the fetching of feeds. This is
-    useful, for example, if you are using Privoxy/Tor.
-
-4.0 (2013-06-25)
-----------------
-    Searching of articles is now achieved with Whoosh, a fast full-text
-    indexing and searching library.
-
-3.9 (2013-04-14)
-----------------
-    The code has been tested and ported to Python 3.3.1. Some minor bugs have
-    been fixed, with a lot of improvements concerning the Mako templates,
-    MongoDB database management, and management of exceptions.
-
-3.8 (2013-01-12)
-----------------
-    This release introduces a reworked management page; it is now possible to
-    change the username and password via this page.
-    Some improvements concerning the HTML export of the database have been
-    made. Finally, indexed MongoDB full text searching provides a much faster
-    search.
-
-3.7 (2012-12-29)
-----------------
-    pyAggr3g470r is now using the Mako template library.
-    Performance improvements for the MongoDB database have been made, and some
-    minor bugfixes. Stop words (a, of, the, an, for...) are now ignored when
-    calculating top words for the generation of the tag cloud.
-    A new page indicates the list of inactive feeds (with no new published
-    articles since a given number of days).
-
-3.6 (2012-11-08)
-----------------
-    pyAggr3g470r is now running with Python 3.2(.3). It uses CherryPy 3.2.2,
-    BeautifulSoup4, and feedparser3.Your old MongoDB database can be used
-    without any problem.
-
-3.5 (2012-10-28)
-----------------
-    Some minor bugfixes and improvements.
-    An authentication screen has been added, which is useful if pyAggr3g470r
-    is running on an EC2 instance, for example.
-
-3.4 (2012-05-01)
-----------------
-    This version introduces some minor improvements and bugfixes.
-    All features of pyAggr3g470r are now back (with MongoDB).
-
-3.3 (2012-04-16)
-----------------
-    This version introduces minor improvements and a bugfix.
-    Publication dates of articles are now stored as a datetime object.
-    A bug in the script that converts an SQLite database to a MongoDB database
-    is now fixed.
-    A little documentation has been added.
-
-3.2 (2012-03-20)
-----------------
-    A MongoDB database is now used instead of the SQLite database. This change
-    offers a significant performance improvement. The database has been tested
-    with more than 30,000 articles, but version 3.2 is still a test version.
-    A more stable version will arrive soon.
-
-3.1 (2011-11-29)
-----------------
-    A new version of the QR Code module is used. For each article, a QR Code
-    is generated based on the content of the article. If the article is too
-    long, only the article's URL is encoded in the QR Code. For a given
-    string, the algorithm tries the generate as small a QR Code as possible.
-    Minor bugs were fixed.
-
-3.0 (2011-10-25)
-----------------
-    This release introduces exportation of articles to the HTML format and to
-    the PDF format (there is still exportation to ePub).
-    The sharing of articles with delicious.com was replaced by pinboard.in.s
-
-2.9 (2011-08-26)
-----------------
-    Some minor improvements. A bug with the HTML <code> tag bas been fixed.
-    Cleanup was done with Pylint.
-    The test database of pyAggr3g470r contains more than 22000 articles,
-    and it runs perfectly.
-
-2.8 (2011-07-08)
-----------------
-    The feed summary page, which displays general information about a feed,
-    now lets you change the feed metadata (feed logo, feed name, and feed URL
-    if changed). Moreover, this page displays the activity of a feed and other
-    useful information. It is now possible to set a different POD for Diaspora
-    in the configuration file and to share an article with Google +1.
-    A control file to start or stop pyAggr3g470r has been added.
-    From the GUI side, a new transparent CSS tooltip has been introduced in
-    order to preview an article.
-    Finally, some minor performance improvements and bugfixes were made.
-
-2.7 (2011-04-15)
-----------------
-    Minor improvements.
-    It is now possible to set a maximum number of articles to be loaded from
-    the database for each feed (via the management page).
-
-2.6 (2011-03-21)
-----------------
-    This version introduces a new page that displays general information about
-    a feed. There are some minor improvements in the Web interface.
-    The version of pyAggr3g470r for Python 3 is now fully ready and has been
-    tested with Python 3.2.
-
-2.5 (2011-01-19)
-----------------
-    A bug when removing a feed from the data base was fixed.
-    Minor improvements were made for export of articles and the size of HTML
-    forms.
-
-2.4 (2010-12-07)
-----------------
-    The GUI uses more HTML 5 features like HTML5 Forms Validation
-    (email input, URL input), an HTML5 month+year date picker, and a
-    placeholder. From each article it is possible to access the
-    following and previous article (and a new main menu with CSS ToolTip).
-    Articles can now be exported to the EPUB format. Articles loaded from the
-    SQLite base are now stored in memory in a better data structure. With more
-    than 10,000 articles, pyAggr3g470r starts in 3 seconds. Finally, email
-    notifications are now sent with HTML message content and with an
-    alternative plain text version (MIMEMultipart).
-
-2.3 (2010-11-15)
-----------------
-    This version introduces HTML5 Forms Validation and a HTML5 month+year date
-    picker for the history page, which can be used to search for articles.
-    This currently only works with Opera.
-
-2.2 (2010-11-03)
-----------------
-    There is now a third way to export articles from the SQLite base.
-    There is an export method for the wiki DokuWiki (example in the commit
-    message).
-
-2.1 (2010-10-25)
-----------------
-    The export of articles to HTML has been updated, with better output.
-    There are a number of improvements (the search function, generation of
-    tags cloud, display of article content, CSS, bugfixes, etc.).
-    There is a new Wiki.
-
-2.0 (2010-09-03)
-----------------
-    It is now possible to browse articles by year and month with tag clouds
-    (see new screenshots).
-    In addition, URL errors are detected before downloading feeds.
-    There are some improvements in the user interface.
-
-1.9 (2010-09-02)
-----------------
-    The feedgetter module was improved. More details about articles are stored
-    in the database when possile. An attempt is made to get the whole article
-    (a_feed['entries'][i].content[j].value), and in the event of failure,
-    the description/summary is used (a_feed['entries'][i].description).
-
-1.8 (2010-08-25)
-----------------
-    It is now easier to install pyAggr3g470r.
-    There is no longer any need to set any path in the configuration file.
-
-1.7 (2010-07-23)
-----------------
-    This release generates QR codes with URLs of articles, so you can read an
-    article later on your smartphone (or share with friends).
-
-1.6 (2010-07-08)
-----------------
-    It is now possible to automatically add a feed (with the URL of the site),
-    delete an article, delete a feed with all its articles, and to delete all
-    articles from the database.
-    There are also some nice improvements to performance, tested with more
-    than 3000 articles.
-    Furthermore, HTML export of all the articles of the database was improved.
-    You can also export the articles in raw text. Finally, some minor bugs
-    were fixed.
-
-1.5 (2010-07-05)
-----------------
-    Now pyAggr3g470r only works with Python 2.7.
-    OrderedDict objects are used in order to sort the feeds alphabetically in
-    a simple way.
-
-1.4 (2010-06-10)
-----------------
-    It is now possible to remove all articles of a given feed from the SQLite
-    base via the management page. You can also add a feed just with the URL
-    of the Web page. The URL of the feed is obtained by parsing the Web page
-    with the module BeautifulSoup.
-
-1.3 (2010-05-04)
-----------------
-    All articles stored in the SQLite database can now be exported as HTML or
-    raw text via the management page.
-
-1.2 (2010-04-29)
-----------------
-    This version introduces a tag cloud with variable word length.
-    Some improvements were made to the CSS and a bug was fixed.
-
-1.1 (2010-04-15)
-----------------
-    Introduction of a Google Buzz button.
-    It is now possible to mark or unmark articles as favorites.
-
-1.0 (2010-03-23)
-----------------
-    The database of feeds is monitored with the Python gamin module,
-    if present. Otherwise it is done with a classic function.
-    You now have the option to be informed of new articles by email. To
-    receive these notifications, just click on "Stay tuned" for the
-    desired feed(s) at the main page of pyAggr3g470r in the browser.
-
-0.9 (2010-02-28)
-----------------
-    TuxDroid tells you when there are unread articles (this module is
-    independent in case you don't have a TuxDroid). Moreover, the language of
-    articles is detected (thanks to the oice.langdet Python module). This
-    allows you to search for articles by language.
-
-0.8 (2010-02-24)
-----------------
-    It is now possible to share articles with delicious, Digg, reddit,
-    Scoopeo, and Blogmarks.
-    The "Management of feeds" page presents information on the database and
-    statistics on articles (with a histogram). HTML tags are now skipped for
-    the search. Some other improvements were made.
-
-0.7 (2010-02-15)
-----------------
-    It is now possible to search for an article, through the titles and
-    descriptions.
-
-0.6 (2010-02-05)
-----------------
-    Unread articles are now shown in bold. This was implemented using a new
-    field in the SQLite database. New tabs for article descriptions are opened
-    with the _rel=noreferrer_ option in order to separate processes (useful
-    with Chromium). It is now possible to see only unread articles for each feed.
-
-0.5 (2010-02-02)
-----------------
-    It is now possible to fetch feeds manually by clicking on "Fetch all feeds"
-    and/or with cron. Better navigation between feeds and improvements to the
-    SQLite database have been added.
-
-0.4 (2010-02-01)
-----------------
-    Release 0.4. The main page display only 10 articles by feeds.
-    For each feeds a page present the list of all articles. The SQLite base is
-    smaller than before (removed hashed value).
-    A lot of improvements.
-
-0.3 (2010-02-01)
-----------------
-    A new menu was added for faster access to feeds. Some improvements were
-    made to the CSS.
-
-0.2 (2010-01-31)
-----------------
-    Articles are now sorted by date, and it is possible to read just a
-    description of an article. There are some improvements in the code and
-    SQLite base management.
-
-0.1 (2010-01-29)
-----------------
-    First release of pyAggr3g470r.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
new file mode 100644
index 00000000..185dea8a
--- /dev/null
+++ b/CHANGELOG.rst
@@ -0,0 +1,518 @@
+=================
+Release History
+=================
+
+current
+-------
+    New:
+     * Redoing entierly the home page with react, JARR is going on toward a one page app.
+     * Implementing categories
+    Improvements:
+     * Code re-arangement: move all code to /src/
+
+6.7 (2015-07-21)
+----------------
+    New:
+     * a filter mechanism for feeds has been added (PR #14);
+     * icon of feeds is now an url retrieved from the feed or the site link (PR #15).
+    Improvements:
+     * improvements for the bookmarklet (PR #16 and PR #18);
+     * performance improvements (display of the /feed page);
+     * enhancements for the two crawlers;
+     * enhancements of the UI (PR #14);
+     * misc changes to the models and controllers.
+
+6.6 (2015-06-02)
+----------------
+    New:
+     * it is now possible to sort the list of articles by feed title or article title;
+     * added base unittests.
+    Improvements:
+     * fixed some minor bugs;
+     * improved the asyncio crawler;
+     * automatically use the good Python executable for the asyncio crawler;
+     * improved controllers (enforcing better use of user_id);
+     * the search is now case insensitive.
+
+6.5.5 (2015-04-22)
+------------------
+    The full text search powered by Whoosh has been removed.
+
+6.5.4 (2015-04-16)
+------------------
+    This release introduces a new config module and a new search functionality.
+    The result of a search is integrated in the main page.
+
+6.5.3 (2015-04-14)
+------------------
+    The fetch call is now dependent to the selected crawling method.
+
+6.5.2 (2015-04-14)
+------------------
+    The look and feel has been globally improved.
+    It is now possible to add a new feed from any page via a dropdown menu.
+
+6.5.1 (2015-04-08)
+------------------
+    Improvements:
+     * improvements on the controllers;
+     * the minimum error count is now specified in the configuration file.
+
+6.5 (2015-04-07)
+----------------
+    Improvements:
+     * new CSS;
+     * improved installation script;
+     * it is now possible to delete all duplicate articles with one HTTP delete request.
+
+6.4 (2015-03-17)
+----------------
+    New:
+     * a new page 'history' to explore your database of articles.
+    Changes:
+     * updated documentation;
+     * minor improvements.
+    Fixes:
+     * changed the type of the column 'last_modified' to string.
+
+6.3 (2015-03-08)
+----------------
+    New:
+    * a new architecture with base for controllers;
+    * new, more complete RESTful API;
+    * a crawler handling errors and cache control;
+    * the new crawler takes advantage of the RESTful API
+    (can be run on the client side).
+
+6.2 (2015-02-26)
+----------------
+    The system of email notifications for new articles has been removed.
+    This feature was hardly used.
+
+6.1 (2015-02-23)
+----------------
+    Improvements: articles are now identified with the id provided
+    by the RSS/ATOM feed.
+    Prevent BeautifulSoup4 from adding extra '<html><body>' tags to
+    the soup with the 'lxml' parser.
+    Indexation is now used with the new crawler.
+    The documentation has been updated.
+
+6.0 (2015-02-08)
+----------------
+    New: pyAggr3g470r is now working with Python 3.4. Tested on Heroku
+    with Python 3.4.2.
+    Improvements: The feed crawler uses the PEP 3156 (asyncio). The action
+    buttons are now on the left. It is easier to mark an article as read.
+
+5.7.0 (2014-11-20)
+------------------
+    Improvements: major improvements of the email notifications system.
+    Notifications are now sent through Postmark (for example for Heroku)
+    or a standard SMTP server.
+    Bug fix: activation key was too long for the database column.
+
+5.6.6 (2014-09-24)
+------------------
+    Improvements: Significant performance improvement for the views
+    /favorites and /unread. The performance problem has been introduced
+    with the release 5.6.5.
+
+5.6.5 (2014-09-15)
+------------------
+    This release fixes a major bug introduced with the version 0.9.7 of SQLAlchemy
+    (conflicts between persistent ant transcient SQLAlchemy objects).
+
+5.6.4 (2014-09-12)
+------------------
+    Translations have been updated.
+    Gravatar extension has been removed.
+    Minor fix.
+
+5.6.3 (2014-08-27)
+------------------
+    News: It is now possible to delete articles older than a given number
+    of weeks. This can be done via the management page.
+    A new environment variable enables to activate or deactivate the
+    self-registration.
+    Changes: translations were updated. Some minor bugfixes. Improved
+    deployment instructions in the README.
+
+5.6.2 (2014-08-10)
+------------------
+    Minor improvements: Articles are automatically retrieved after the import
+    of an OPML file.
+    When displaying all articles (unread + read), titles of unread articles
+    are emphasized in bold.
+    A new tab is opened when clicking on the title of an article.
+    New: pyAggr3g470r can be deployed with the Heroku auto deploy button.
+
+5.6.1 (2014-07-13)
+------------------
+    Performance improvements: faster database insertion of articles by
+    the crawler and loading of the management page.
+    Minor bug fixes.
+
+5.6 (2014-07-05)
+----------------
+    pyAggr3g470r has now a RESTful JSON API which enables to manage Feed and
+    Article objects. The API is documented in the README file.
+    The main page is using a subset of this API with jQuery HTTP requests.
+
+5.5 (2014-06-14)
+----------------
+    This release introduces a redesigned homepage which loads much faster and
+    is easier to read. pyAggr3g470r can now be run by Apache.
+    Adding a feed no longer requires a title and a site link.
+
+5.4 (2014-05-28)
+----------------
+    This version makes it possible for a user to export all of their feeds and
+    articles as a JSON file for later restoration.
+
+5.3 (2014-05-23)
+----------------
+    This release introduces some UI improvements, especially for the home page.
+
+5.2 (2014-05-16)
+----------------
+    This release adds minor bug fixes and UI improvements.
+
+5.1 (2014-05-13)
+----------------
+    When deployed on Heroku, the platform now uses the Postmark service to
+    send account confirmation emails to users. It is no longer required to
+    enter a first name and a last name to create an account.
+
+5.0 (2014-05-04)
+----------------
+    pyAggr3g470r is now translated into English and French. Improvements
+    concerning the news retriever and the Whoosh search functionality have
+    been made. The user can now export all articles in JSON format.
+    The user of the platform now has the possibility to delete his or her
+    account.
+
+4.9 (2014-04-24)
+----------------
+    This version introduces minor improvements to the Jinja templates and
+    bugfixes (relating to the import of OPML files with sub-categories and
+    relating to the Whoosh index generation).
+
+4.8 (2014-04-13)
+----------------
+    Feeds are now retrieved in a separated process with the Gevent library.
+    This offers the best performance on Heroku.
+
+4.7 (2014-04-12)
+----------------
+    pyAggr3g470r can now be deployed on Heroku or on a traditional server.
+    Moreover, several users can use an instance of pyAggr3g470r. A platform is
+    managed by the administrator, a user with specific rights.
+
+4.6 (2014-02-09)
+----------------
+    This release introduces the import of OPML files of subscriptions.
+    Minor improvements have been made to the templates.
+
+4.5 (2014-01-29)
+----------------
+    This release introduces a one step installation process with a simple
+    script. Minor improvements to the feedgetter module have been introduced
+    (the feed description is now stored in the database). Miscellaneous
+    improvements to the Jinja templates. Finally, more configuration options
+    are now offered to the user.
+
+4.4 (2013-12-27)
+----------------
+    This version introduces some improvements for the feedgetter module
+    including automatic retrieval of the URL behind feedproxy.google.com,
+    and support for configuring the user agent and proxy. Minor improvements
+    were made to the MongoEngine models. Notifications are displayed with
+    Flask flash messages.
+
+4.3 (2013-12-03)
+----------------
+    With this release, the user is able to update her personal information.
+    It is now possible to enable/disable the checking of updates for a feed.
+    Some performance improvements and user interface optimizations have been
+    done.
+
+4.2 (2013-11-10)
+----------------
+    This is the first release of the new version of pyAggr3g470r.
+    The code has been rewritten with the Flask microframework and the
+    Bootstrap frontend framework.
+
+4.1 (2013-08-11)
+----------------
+    HTTP proxy support has been added for the fetching of feeds. This is
+    useful, for example, if you are using Privoxy/Tor.
+
+4.0 (2013-06-25)
+----------------
+    Searching of articles is now achieved with Whoosh, a fast full-text
+    indexing and searching library.
+
+3.9 (2013-04-14)
+----------------
+    The code has been tested and ported to Python 3.3.1. Some minor bugs have
+    been fixed, with a lot of improvements concerning the Mako templates,
+    MongoDB database management, and management of exceptions.
+
+3.8 (2013-01-12)
+----------------
+    This release introduces a reworked management page; it is now possible to
+    change the username and password via this page.
+    Some improvements concerning the HTML export of the database have been
+    made. Finally, indexed MongoDB full text searching provides a much faster
+    search.
+
+3.7 (2012-12-29)
+----------------
+    pyAggr3g470r is now using the Mako template library.
+    Performance improvements for the MongoDB database have been made, and some
+    minor bugfixes. Stop words (a, of, the, an, for...) are now ignored when
+    calculating top words for the generation of the tag cloud.
+    A new page indicates the list of inactive feeds (with no new published
+    articles since a given number of days).
+
+3.6 (2012-11-08)
+----------------
+    pyAggr3g470r is now running with Python 3.2(.3). It uses CherryPy 3.2.2,
+    BeautifulSoup4, and feedparser3.Your old MongoDB database can be used
+    without any problem.
+
+3.5 (2012-10-28)
+----------------
+    Some minor bugfixes and improvements.
+    An authentication screen has been added, which is useful if pyAggr3g470r
+    is running on an EC2 instance, for example.
+
+3.4 (2012-05-01)
+----------------
+    This version introduces some minor improvements and bugfixes.
+    All features of pyAggr3g470r are now back (with MongoDB).
+
+3.3 (2012-04-16)
+----------------
+    This version introduces minor improvements and a bugfix.
+    Publication dates of articles are now stored as a datetime object.
+    A bug in the script that converts an SQLite database to a MongoDB database
+    is now fixed.
+    A little documentation has been added.
+
+3.2 (2012-03-20)
+----------------
+    A MongoDB database is now used instead of the SQLite database. This change
+    offers a significant performance improvement. The database has been tested
+    with more than 30,000 articles, but version 3.2 is still a test version.
+    A more stable version will arrive soon.
+
+3.1 (2011-11-29)
+----------------
+    A new version of the QR Code module is used. For each article, a QR Code
+    is generated based on the content of the article. If the article is too
+    long, only the article's URL is encoded in the QR Code. For a given
+    string, the algorithm tries the generate as small a QR Code as possible.
+    Minor bugs were fixed.
+
+3.0 (2011-10-25)
+----------------
+    This release introduces exportation of articles to the HTML format and to
+    the PDF format (there is still exportation to ePub).
+    The sharing of articles with delicious.com was replaced by pinboard.in.s
+
+2.9 (2011-08-26)
+----------------
+    Some minor improvements. A bug with the HTML <code> tag bas been fixed.
+    Cleanup was done with Pylint.
+    The test database of pyAggr3g470r contains more than 22000 articles,
+    and it runs perfectly.
+
+2.8 (2011-07-08)
+----------------
+    The feed summary page, which displays general information about a feed,
+    now lets you change the feed metadata (feed logo, feed name, and feed URL
+    if changed). Moreover, this page displays the activity of a feed and other
+    useful information. It is now possible to set a different POD for Diaspora
+    in the configuration file and to share an article with Google +1.
+    A control file to start or stop pyAggr3g470r has been added.
+    From the GUI side, a new transparent CSS tooltip has been introduced in
+    order to preview an article.
+    Finally, some minor performance improvements and bugfixes were made.
+
+2.7 (2011-04-15)
+----------------
+    Minor improvements.
+    It is now possible to set a maximum number of articles to be loaded from
+    the database for each feed (via the management page).
+
+2.6 (2011-03-21)
+----------------
+    This version introduces a new page that displays general information about
+    a feed. There are some minor improvements in the Web interface.
+    The version of pyAggr3g470r for Python 3 is now fully ready and has been
+    tested with Python 3.2.
+
+2.5 (2011-01-19)
+----------------
+    A bug when removing a feed from the data base was fixed.
+    Minor improvements were made for export of articles and the size of HTML
+    forms.
+
+2.4 (2010-12-07)
+----------------
+    The GUI uses more HTML 5 features like HTML5 Forms Validation
+    (email input, URL input), an HTML5 month+year date picker, and a
+    placeholder. From each article it is possible to access the
+    following and previous article (and a new main menu with CSS ToolTip).
+    Articles can now be exported to the EPUB format. Articles loaded from the
+    SQLite base are now stored in memory in a better data structure. With more
+    than 10,000 articles, pyAggr3g470r starts in 3 seconds. Finally, email
+    notifications are now sent with HTML message content and with an
+    alternative plain text version (MIMEMultipart).
+
+2.3 (2010-11-15)
+----------------
+    This version introduces HTML5 Forms Validation and a HTML5 month+year date
+    picker for the history page, which can be used to search for articles.
+    This currently only works with Opera.
+
+2.2 (2010-11-03)
+----------------
+    There is now a third way to export articles from the SQLite base.
+    There is an export method for the wiki DokuWiki (example in the commit
+    message).
+
+2.1 (2010-10-25)
+----------------
+    The export of articles to HTML has been updated, with better output.
+    There are a number of improvements (the search function, generation of
+    tags cloud, display of article content, CSS, bugfixes, etc.).
+    There is a new Wiki.
+
+2.0 (2010-09-03)
+----------------
+    It is now possible to browse articles by year and month with tag clouds
+    (see new screenshots).
+    In addition, URL errors are detected before downloading feeds.
+    There are some improvements in the user interface.
+
+1.9 (2010-09-02)
+----------------
+    The feedgetter module was improved. More details about articles are stored
+    in the database when possile. An attempt is made to get the whole article
+    (a_feed['entries'][i].content[j].value), and in the event of failure,
+    the description/summary is used (a_feed['entries'][i].description).
+
+1.8 (2010-08-25)
+----------------
+    It is now easier to install pyAggr3g470r.
+    There is no longer any need to set any path in the configuration file.
+
+1.7 (2010-07-23)
+----------------
+    This release generates QR codes with URLs of articles, so you can read an
+    article later on your smartphone (or share with friends).
+
+1.6 (2010-07-08)
+----------------
+    It is now possible to automatically add a feed (with the URL of the site),
+    delete an article, delete a feed with all its articles, and to delete all
+    articles from the database.
+    There are also some nice improvements to performance, tested with more
+    than 3000 articles.
+    Furthermore, HTML export of all the articles of the database was improved.
+    You can also export the articles in raw text. Finally, some minor bugs
+    were fixed.
+
+1.5 (2010-07-05)
+----------------
+    Now pyAggr3g470r only works with Python 2.7.
+    OrderedDict objects are used in order to sort the feeds alphabetically in
+    a simple way.
+
+1.4 (2010-06-10)
+----------------
+    It is now possible to remove all articles of a given feed from the SQLite
+    base via the management page. You can also add a feed just with the URL
+    of the Web page. The URL of the feed is obtained by parsing the Web page
+    with the module BeautifulSoup.
+
+1.3 (2010-05-04)
+----------------
+    All articles stored in the SQLite database can now be exported as HTML or
+    raw text via the management page.
+
+1.2 (2010-04-29)
+----------------
+    This version introduces a tag cloud with variable word length.
+    Some improvements were made to the CSS and a bug was fixed.
+
+1.1 (2010-04-15)
+----------------
+    Introduction of a Google Buzz button.
+    It is now possible to mark or unmark articles as favorites.
+
+1.0 (2010-03-23)
+----------------
+    The database of feeds is monitored with the Python gamin module,
+    if present. Otherwise it is done with a classic function.
+    You now have the option to be informed of new articles by email. To
+    receive these notifications, just click on "Stay tuned" for the
+    desired feed(s) at the main page of pyAggr3g470r in the browser.
+
+0.9 (2010-02-28)
+----------------
+    TuxDroid tells you when there are unread articles (this module is
+    independent in case you don't have a TuxDroid). Moreover, the language of
+    articles is detected (thanks to the oice.langdet Python module). This
+    allows you to search for articles by language.
+
+0.8 (2010-02-24)
+----------------
+    It is now possible to share articles with delicious, Digg, reddit,
+    Scoopeo, and Blogmarks.
+    The "Management of feeds" page presents information on the database and
+    statistics on articles (with a histogram). HTML tags are now skipped for
+    the search. Some other improvements were made.
+
+0.7 (2010-02-15)
+----------------
+    It is now possible to search for an article, through the titles and
+    descriptions.
+
+0.6 (2010-02-05)
+----------------
+    Unread articles are now shown in bold. This was implemented using a new
+    field in the SQLite database. New tabs for article descriptions are opened
+    with the _rel=noreferrer_ option in order to separate processes (useful
+    with Chromium). It is now possible to see only unread articles for each feed.
+
+0.5 (2010-02-02)
+----------------
+    It is now possible to fetch feeds manually by clicking on "Fetch all feeds"
+    and/or with cron. Better navigation between feeds and improvements to the
+    SQLite database have been added.
+
+0.4 (2010-02-01)
+----------------
+    Release 0.4. The main page display only 10 articles by feeds.
+    For each feeds a page present the list of all articles. The SQLite base is
+    smaller than before (removed hashed value).
+    A lot of improvements.
+
+0.3 (2010-02-01)
+----------------
+    A new menu was added for faster access to feeds. Some improvements were
+    made to the CSS.
+
+0.2 (2010-01-31)
+----------------
+    Articles are now sorted by date, and it is possible to read just a
+    description of an article. There are some improvements in the code and
+    SQLite base management.
+
+0.1 (2010-01-29)
+----------------
+    First release of pyAggr3g470r.
diff --git a/README.rst b/README.rst
index fbfda039..5d9733f8 100644
--- a/README.rst
+++ b/README.rst
@@ -1,15 +1,14 @@
-++++
+====
 JARR
-++++
+====
 
 Presentation
-============
+------------
 
-`JARR (Just Another RSS Reader) <https://github.com/JARR-aggregator/JARR>`_ is a
-web-based news aggregator.
+`JARR (Just Another RSS Reader) <https://github.com/JARR-aggregator/JARR>`_ is a web-based news aggregator and reader.
 
 Main features
-=============
+-------------
 
 * can be easily deployed on Heroku or on a traditional server;
 * multiple users can use a JARR instance;
@@ -21,37 +20,33 @@ Main features
 * detection of inactive feeds;
 * share articles with Google +, Pinboard and reddit.
 
-The core technologies are `Flask <http://flask.pocoo.org>`_,
-`asyncio <https://www.python.org/dev/peps/pep-3156/>`_ and
-`SQLAlchemy <http://www.sqlalchemy.org>`_.
+The core technologies are `Flask <http://flask.pocoo.org>`_, `asyncio <https://www.python.org/dev/peps/pep-3156/>`_ and `SQLAlchemy <http://www.sqlalchemy.org>`_.
 
 Python 3.5 is recommended.
 
 Documentation
-=============
+-------------
 
 A documentation is available `here <https://jarr.readthedocs.org>`_ and provides
 different ways to install JARR.
 
 Internationalization
-====================
+--------------------
 
 JARR is translated into English and French.
 
 Donation
-========
+--------
 
-If you wish and if you like *JARR*, you can donate via bitcoin
-`1GVmhR9fbBeEh7rP1qNq76jWArDdDQ3otZ <https://blockexplorer.com/address/1GVmhR9fbBeEh7rP1qNq76jWArDdDQ3otZ>`_.
+If you wish and if you like *JARR*, you can donate via bitcoin `1GVmhR9fbBeEh7rP1qNq76jWArDdDQ3otZ <https://blockexplorer.com/address/1GVmhR9fbBeEh7rP1qNq76jWArDdDQ3otZ>`_.
 Thank you!
 
 License
-=======
+-------
 
-`JARR <https://github.com/JARR-aggregator/JARR>`_
-is under the `GNU Affero General Public License version 3 <https://www.gnu.org/licenses/agpl-3.0.html>`_.
+`JARR <https://github.com/JARR-aggregator/JARR>`_ is under the `GNU Affero General Public License version 3 <https://www.gnu.org/licenses/agpl-3.0.html>`_.
 
 Contact
-=======
+-------
 
 `My home page <https://www.cedricbonhomme.org>`_.
diff --git a/src/crawler.py b/src/crawler.py
deleted file mode 100644
index 0598c418..00000000
--- a/src/crawler.py
+++ /dev/null
@@ -1,168 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -
-
-# jarr - A Web based news aggregator.
-# Copyright (C) 2010-2015  Cédric Bonhomme - https://www.JARR-aggregator.org
-#
-# For more information : https://github.com/JARR-aggregator/JARR/
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-__author__ = "Cedric Bonhomme"
-__version__ = "$Revision: 3.3 $"
-__date__ = "$Date: 2010/09/02 $"
-__revision__ = "$Date: 2015/12/07 $"
-__copyright__ = "Copyright (c) Cedric Bonhomme"
-__license__ = "AGPLv3"
-
-import asyncio
-import logging
-import feedparser
-import dateutil.parser
-from datetime import datetime
-from sqlalchemy import or_
-
-import conf
-from bootstrap import db
-from web.models import User
-from web.controllers import FeedController, ArticleController
-from web.lib.feed_utils import construct_feed_from, is_parsing_ok
-from web.lib.article_utils import construct_article, extract_id
-
-logger = logging.getLogger(__name__)
-
-sem = asyncio.Semaphore(5)
-
-import ssl
-try:
-    _create_unverified_https_context = ssl._create_unverified_context
-except AttributeError:
-    # Legacy Python that doesn't verify HTTPS certificates by default
-    pass
-else:
-    # Handle target environment that doesn't support HTTPS verification
-    ssl._create_default_https_context = _create_unverified_https_context
-
-
-async def get(*args, **kwargs):
-    #kwargs["connector"] = aiohttp.TCPConnector(verify_ssl=False)
-    try:
-        data = feedparser.parse(args[0])
-        return data
-    except Exception as e:
-        raise e
-
-
-async def parse_feed(user, feed):
-    """
-    Fetch a feed.
-    Update the feed and return the articles.
-    """
-    parsed_feed = None
-    up_feed = {}
-    articles = []
-    with (await sem):
-        try:
-            parsed_feed = await get(feed.link)
-        except Exception as e:
-            up_feed['last_error'] = str(e)
-            up_feed['error_count'] = feed.error_count + 1
-        finally:
-            up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal())
-            if parsed_feed is None:
-                FeedController().update({'id': feed.id}, up_feed)
-                return
-
-    if not is_parsing_ok(parsed_feed):
-        up_feed['last_error'] = str(parsed_feed['bozo_exception'])
-        up_feed['error_count'] = feed.error_count + 1
-        FeedController().update({'id': feed.id}, up_feed)
-        return
-    if parsed_feed['entries'] != []:
-        articles = parsed_feed['entries']
-
-    up_feed['error_count'] = 0
-    up_feed['last_error'] = ""
-
-    # Feed informations
-    construct_feed_from(feed.link, parsed_feed).update(up_feed)
-    if feed.title and 'title' in up_feed:
-        # do not override the title set by the user
-        del up_feed['title']
-    FeedController().update({'id': feed.id}, up_feed)
-
-    return articles
-
-
-async def insert_database(user, feed):
-
-    articles = await parse_feed(user, feed)
-    if None is articles:
-        return []
-
-    logger.debug('inserting articles for {}'.format(feed.title))
-
-    logger.info("Database insertion...")
-    new_articles = []
-    art_contr = ArticleController(user.id)
-    for article in articles:
-        exist = art_contr.read(feed_id=feed.id,
-                        **extract_id(article)).count() != 0
-        if exist:
-            logger.debug("Article %r (%r) already in the database.",
-                         article['title'], article['link'])
-            continue
-        article = construct_article(article, feed)
-        try:
-            new_articles.append(art_contr.create(**article))
-            logger.info("New article % (%r) added.",
-                        article['title'], article['link'])
-        except Exception:
-            logger.exception("Error when inserting article in database:")
-            continue
-    return new_articles
-
-
-async def init_process(user, feed):
-    # Fetch the feed and insert new articles in the database
-    articles = await insert_database(user, feed)
-    logger.debug('inserted articles for %s', feed.title)
-    return articles
-
-
-def retrieve_feed(loop, user, feed_id=None):
-    """
-    Launch the processus.
-    """
-    logger.info("Starting to retrieve feeds.")
-
-    # Get the list of feeds to fetch
-    user = User.query.filter(User.email == user.email).first()
-    feeds = [feed for feed in user.feeds if
-             feed.error_count <= conf.DEFAULT_MAX_ERROR and feed.enabled]
-    if feed_id is not None:
-        feeds = [feed for feed in feeds if feed.id == feed_id]
-
-    if feeds == []:
-        return
-
-    # Launch the process for all the feeds
-    tasks = [asyncio.ensure_future(init_process(user, feed)) for feed in feeds]
-
-    try:
-        loop.run_until_complete(asyncio.wait(tasks))
-    except Exception:
-        logger.exception('an error occured')
-
-    logger.info("All articles retrieved. End of the processus.")
diff --git a/src/crawler/classic_crawler.py b/src/crawler/classic_crawler.py
new file mode 100644
index 00000000..0598c418
--- /dev/null
+++ b/src/crawler/classic_crawler.py
@@ -0,0 +1,168 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -
+
+# jarr - A Web based news aggregator.
+# Copyright (C) 2010-2015  Cédric Bonhomme - https://www.JARR-aggregator.org
+#
+# For more information : https://github.com/JARR-aggregator/JARR/
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+__author__ = "Cedric Bonhomme"
+__version__ = "$Revision: 3.3 $"
+__date__ = "$Date: 2010/09/02 $"
+__revision__ = "$Date: 2015/12/07 $"
+__copyright__ = "Copyright (c) Cedric Bonhomme"
+__license__ = "AGPLv3"
+
+import asyncio
+import logging
+import feedparser
+import dateutil.parser
+from datetime import datetime
+from sqlalchemy import or_
+
+import conf
+from bootstrap import db
+from web.models import User
+from web.controllers import FeedController, ArticleController
+from web.lib.feed_utils import construct_feed_from, is_parsing_ok
+from web.lib.article_utils import construct_article, extract_id
+
+logger = logging.getLogger(__name__)
+
+sem = asyncio.Semaphore(5)
+
+import ssl
+try:
+    _create_unverified_https_context = ssl._create_unverified_context
+except AttributeError:
+    # Legacy Python that doesn't verify HTTPS certificates by default
+    pass
+else:
+    # Handle target environment that doesn't support HTTPS verification
+    ssl._create_default_https_context = _create_unverified_https_context
+
+
+async def get(*args, **kwargs):
+    #kwargs["connector"] = aiohttp.TCPConnector(verify_ssl=False)
+    try:
+        data = feedparser.parse(args[0])
+        return data
+    except Exception as e:
+        raise e
+
+
+async def parse_feed(user, feed):
+    """
+    Fetch a feed.
+    Update the feed and return the articles.
+    """
+    parsed_feed = None
+    up_feed = {}
+    articles = []
+    with (await sem):
+        try:
+            parsed_feed = await get(feed.link)
+        except Exception as e:
+            up_feed['last_error'] = str(e)
+            up_feed['error_count'] = feed.error_count + 1
+        finally:
+            up_feed['last_retrieved'] = datetime.now(dateutil.tz.tzlocal())
+            if parsed_feed is None:
+                FeedController().update({'id': feed.id}, up_feed)
+                return
+
+    if not is_parsing_ok(parsed_feed):
+        up_feed['last_error'] = str(parsed_feed['bozo_exception'])
+        up_feed['error_count'] = feed.error_count + 1
+        FeedController().update({'id': feed.id}, up_feed)
+        return
+    if parsed_feed['entries'] != []:
+        articles = parsed_feed['entries']
+
+    up_feed['error_count'] = 0
+    up_feed['last_error'] = ""
+
+    # Feed informations
+    construct_feed_from(feed.link, parsed_feed).update(up_feed)
+    if feed.title and 'title' in up_feed:
+        # do not override the title set by the user
+        del up_feed['title']
+    FeedController().update({'id': feed.id}, up_feed)
+
+    return articles
+
+
+async def insert_database(user, feed):
+
+    articles = await parse_feed(user, feed)
+    if None is articles:
+        return []
+
+    logger.debug('inserting articles for {}'.format(feed.title))
+
+    logger.info("Database insertion...")
+    new_articles = []
+    art_contr = ArticleController(user.id)
+    for article in articles:
+        exist = art_contr.read(feed_id=feed.id,
+                        **extract_id(article)).count() != 0
+        if exist:
+            logger.debug("Article %r (%r) already in the database.",
+                         article['title'], article['link'])
+            continue
+        article = construct_article(article, feed)
+        try:
+            new_articles.append(art_contr.create(**article))
+            logger.info("New article % (%r) added.",
+                        article['title'], article['link'])
+        except Exception:
+            logger.exception("Error when inserting article in database:")
+            continue
+    return new_articles
+
+
+async def init_process(user, feed):
+    # Fetch the feed and insert new articles in the database
+    articles = await insert_database(user, feed)
+    logger.debug('inserted articles for %s', feed.title)
+    return articles
+
+
+def retrieve_feed(loop, user, feed_id=None):
+    """
+    Launch the processus.
+    """
+    logger.info("Starting to retrieve feeds.")
+
+    # Get the list of feeds to fetch
+    user = User.query.filter(User.email == user.email).first()
+    feeds = [feed for feed in user.feeds if
+             feed.error_count <= conf.DEFAULT_MAX_ERROR and feed.enabled]
+    if feed_id is not None:
+        feeds = [feed for feed in feeds if feed.id == feed_id]
+
+    if feeds == []:
+        return
+
+    # Launch the process for all the feeds
+    tasks = [asyncio.ensure_future(init_process(user, feed)) for feed in feeds]
+
+    try:
+        loop.run_until_complete(asyncio.wait(tasks))
+    except Exception:
+        logger.exception('an error occured')
+
+    logger.info("All articles retrieved. End of the processus.")
diff --git a/src/crawler/http_crawler.py b/src/crawler/http_crawler.py
new file mode 100644
index 00000000..f480fe96
--- /dev/null
+++ b/src/crawler/http_crawler.py
@@ -0,0 +1,251 @@
+"""
+Here's a sum up on how it works :
+
+CrawlerScheduler.run
+    will retreive a list of feeds to be refreshed and pass result to
+CrawlerScheduler.callback
+    which will retreive each feed and treat result with
+FeedCrawler.callback
+    which will interprete the result (status_code, etag) collect ids
+    and match them agaisnt pyagg which will cause
+PyAggUpdater.callback
+    to create the missing entries
+"""
+
+import time
+import conf
+import json
+import logging
+import feedparser
+from datetime import datetime, timedelta
+from time import strftime, gmtime
+from concurrent.futures import ThreadPoolExecutor
+from requests_futures.sessions import FuturesSession
+from web.lib.utils import default_handler, to_hash
+from web.lib.feed_utils import construct_feed_from
+from web.lib.article_utils import extract_id, construct_article
+
+logger = logging.getLogger(__name__)
+logging.captureWarnings(True)
+API_ROOT = "api/v2.0/"
+
+
+class AbstractCrawler:
+
+    def __init__(self, auth, pool=None, session=None):
+        self.auth = auth
+        self.pool = pool or ThreadPoolExecutor(max_workers=conf.NB_WORKER)
+        self.session = session or FuturesSession(executor=self.pool)
+        self.session.verify = False
+        self.url = conf.PLATFORM_URL
+
+    def query_pyagg(self, method, urn, data=None):
+        """A wrapper for internal call, method should be ones you can find
+        on requests (header, post, get, options, ...), urn the distant
+        resources you want to access on pyagg, and data, the data you wanna
+        transmit."""
+        if data is None:
+            data = {}
+        method = getattr(self.session, method)
+        return method("%s%s%s" % (self.url, API_ROOT, urn),
+                      auth=self.auth, data=json.dumps(data,
+                                                      default=default_handler),
+                      headers={'Content-Type': 'application/json',
+                               'User-Agent': conf.USER_AGENT})
+
+    def wait(self, max_wait=300, checks=5, wait_for=2):
+        checked, second_waited = 0, 0
+        while True:
+            time.sleep(wait_for)
+            second_waited += wait_for
+            if second_waited > max_wait:
+                logger.warn('Exiting after %d seconds', second_waited)
+                break
+            if self.pool._work_queue.qsize():
+                checked = 0
+                continue
+            checked += 1
+            if checked == checks:
+                break
+
+
+class PyAggUpdater(AbstractCrawler):
+
+    def __init__(self, feed, entries, headers, parsed_feed,
+                 auth, pool=None, session=None):
+        self.feed = feed
+        self.entries = entries
+        self.headers = headers
+        self.parsed_feed = parsed_feed
+        super().__init__(auth, pool, session)
+
+    def callback(self, response):
+        """Will process the result from the challenge, creating missing article
+        and updating the feed"""
+        article_created = False
+        if response.result().status_code != 204:
+            results = response.result().json()
+            logger.debug('%r %r - %d entries were not matched '
+                         'and will be created',
+                         self.feed['id'], self.feed['title'], len(results))
+            for id_to_create in results:
+                article_created = True
+                entry = construct_article(
+                        self.entries[tuple(sorted(id_to_create.items()))],
+                        self.feed)
+                logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
+                            self.feed['title'], entry['title'],
+                            entry['user_id'], id_to_create)
+                self.query_pyagg('post', 'article', entry)
+
+        logger.debug('%r %r - updating feed etag %r last_mod %r',
+                     self.feed['id'], self.feed['title'],
+                     self.headers.get('etag', ''),
+                     self.headers.get('last-modified', ''))
+
+        up_feed = {'error_count': 0, 'last_error': None,
+                   'etag': self.headers.get('etag', ''),
+                   'last_modified': self.headers.get('last-modified',
+                                    strftime('%a, %d %b %Y %X %Z', gmtime()))}
+        fresh_feed = construct_feed_from(url=self.feed['link'],
+                                         fp_parsed=self.parsed_feed)
+        for key in ('description', 'site_link', 'icon_url'):
+            if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
+                up_feed[key] = fresh_feed[key]
+        if not self.feed.get('title'):
+            up_feed['title'] = fresh_feed.get('title', '')
+        up_feed['user_id'] = self.feed['user_id']
+        # re-getting that feed earlier since new entries appeared
+        if article_created:
+            up_feed['last_retrieved'] \
+                    = (datetime.now() - timedelta(minutes=45)).isoformat()
+
+        diff_keys = {key for key in up_feed
+                     if up_feed[key] != self.feed.get(key)}
+        if not diff_keys:
+            return  # no change in the feed, no update
+        if not article_created and diff_keys == {'last_modified', 'etag'}:
+            return  # meaningless if no new article has been published
+        logger.info('%r %r - pushing feed attrs %r',
+                self.feed['id'], self.feed['title'],
+                {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
+                 for key in up_feed if up_feed[key] != self.feed.get(key)})
+
+        self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed)
+
+
+class FeedCrawler(AbstractCrawler):
+
+    def __init__(self, feed, auth, pool=None, session=None):
+        self.feed = feed
+        super().__init__(auth, pool, session)
+
+    def clean_feed(self):
+        """Will reset the errors counters on a feed that have known errors"""
+        if self.feed.get('error_count') or self.feed.get('last_error'):
+            self.query_pyagg('put', 'feed/%d' % self.feed['id'],
+                             {'error_count': 0, 'last_error': ''})
+
+    def callback(self, response):
+        """will fetch the feed and interprete results (304, etag) or will
+        challenge pyagg to compare gotten entries with existing ones"""
+        try:
+            response = response.result()
+            response.raise_for_status()
+        except Exception as error:
+            error_count = self.feed['error_count'] + 1
+            logger.exception('%r %r - an error occured while fetching '
+                    'feed; bumping  error count to %r',
+                    self.feed['id'], self.feed['title'], error_count)
+            future = self.query_pyagg('put', 'feed/%d' % self.feed['id'],
+                                      {'error_count': error_count,
+                                       'last_error': str(error),
+                                       'user_id': self.feed['user_id']})
+            return
+
+        if response.status_code == 304:
+            logger.info("%r %r - feed responded with 304",
+                        self.feed['id'], self.feed['title'])
+            self.clean_feed()
+            return
+        if 'etag' not in response.headers:
+            logger.debug('%r %r - manually generating etag',
+                         self.feed['id'], self.feed['title'])
+            response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text)
+        if response.headers['etag'] and self.feed['etag'] \
+                and response.headers['etag'] == self.feed['etag']:
+            if 'pyagg' in self.feed['etag']:
+                logger.info("%r %r - calculated hash matches (%d)",
+                            self.feed['id'], self.feed['title'],
+                            response.status_code)
+            else:
+                logger.info("%r %r - feed responded with same etag (%d)",
+                            self.feed['id'], self.feed['title'],
+                            response.status_code)
+            self.clean_feed()
+            return
+        else:
+            logger.debug('%r %r - etag mismatch %r != %r',
+                         self.feed['id'], self.feed['title'],
+                         response.headers['etag'], self.feed['etag'])
+        logger.info('%r %r - cache validation failed, challenging entries',
+                    self.feed['id'], self.feed['title'])
+
+        ids, entries = [], {}
+        parsed_response = feedparser.parse(response.content)
+        for entry in parsed_response['entries']:
+            entry_ids = extract_id(entry)
+            entry_ids['feed_id'] = self.feed['id']
+            entry_ids['user_id'] = self.feed['user_id']
+            entries[tuple(sorted(entry_ids.items()))] = entry
+            ids.append(entry_ids)
+        logger.debug('%r %r - found %d entries %r',
+                     self.feed['id'], self.feed['title'], len(ids), ids)
+        future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
+        updater = PyAggUpdater(self.feed, entries, response.headers,
+                               parsed_response,
+                               self.auth, self.pool, self.session)
+        future.add_done_callback(updater.callback)
+
+
+class CrawlerScheduler(AbstractCrawler):
+
+    def __init__(self, username, password, pool=None, session=None):
+        self.auth = (username, password)
+        super(CrawlerScheduler, self).__init__(self.auth, pool, session)
+
+    def prepare_headers(self, feed):
+        """For a known feed, will construct some header dictionnary"""
+        headers = {'User-Agent': conf.USER_AGENT}
+        if feed.get('last_modified'):
+            headers['If-Modified-Since'] = feed['last_modified']
+        if feed.get('etag') and 'pyagg' not in feed['etag']:
+            headers['If-None-Match'] = feed['etag']
+        logger.debug('%r %r - calculated headers %r',
+                     feed['id'], feed['title'], headers)
+        return headers
+
+    def callback(self, response):
+        """processes feeds that need to be fetched"""
+        response = response.result()
+        response.raise_for_status()
+        if response.status_code == 204:
+            logger.debug("No feed to fetch")
+            return
+        feeds = response.json()
+        logger.debug('%d to fetch %r', len(feeds), feeds)
+        for feed in feeds:
+            logger.debug('%r %r - fetching resources',
+                         feed['id'], feed['title'])
+            future = self.session.get(feed['link'],
+                                      headers=self.prepare_headers(feed))
+
+            feed_crwlr = FeedCrawler(feed, self.auth, self.pool, self.session)
+            future.add_done_callback(feed_crwlr.callback)
+
+    def run(self, **kwargs):
+        """entry point, will retreive feeds to be fetch
+        and launch the whole thing"""
+        logger.debug('retreving fetchable feed')
+        future = self.query_pyagg('get', 'feeds/fetchable', kwargs)
+        future.add_done_callback(self.callback)
diff --git a/src/manager.py b/src/manager.py
index f7240670..781d742b 100755
--- a/src/manager.py
+++ b/src/manager.py
@@ -32,7 +32,7 @@ def db_create():
 @manager.command
 def fetch(limit=100, retreive_all=False):
     "Crawl the feeds with the client crawler."
-    from web.lib.crawler import CrawlerScheduler
+    from crawler.http_crawler import CrawlerScheduler
     scheduler = CrawlerScheduler(conf.API_LOGIN, conf.API_PASSWD)
     scheduler.run(limit=limit, retreive_all=retreive_all)
     scheduler.wait()
@@ -47,7 +47,7 @@ def fetch_asyncio(user_id, feed_id):
         populate_g()
         from flask import g
         from web.models import User
-        import crawler
+        from crawler import classic_crawler
         users = []
         try:
             users = User.query.filter(User.id == int(user_id)).all()
@@ -67,7 +67,7 @@ def fetch_asyncio(user_id, feed_id):
             if user.activation_key == "":
                 print("Fetching articles for " + user.nickname)
                 g.user = user
-                crawler.retrieve_feed(loop, g.user, feed_id)
+                classic_crawler.retrieve_feed(loop, g.user, feed_id)
         loop.close()
 
 from scripts.probes import ArticleProbe, FeedProbe
diff --git a/src/web/lib/crawler.py b/src/web/lib/crawler.py
deleted file mode 100644
index f480fe96..00000000
--- a/src/web/lib/crawler.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""
-Here's a sum up on how it works :
-
-CrawlerScheduler.run
-    will retreive a list of feeds to be refreshed and pass result to
-CrawlerScheduler.callback
-    which will retreive each feed and treat result with
-FeedCrawler.callback
-    which will interprete the result (status_code, etag) collect ids
-    and match them agaisnt pyagg which will cause
-PyAggUpdater.callback
-    to create the missing entries
-"""
-
-import time
-import conf
-import json
-import logging
-import feedparser
-from datetime import datetime, timedelta
-from time import strftime, gmtime
-from concurrent.futures import ThreadPoolExecutor
-from requests_futures.sessions import FuturesSession
-from web.lib.utils import default_handler, to_hash
-from web.lib.feed_utils import construct_feed_from
-from web.lib.article_utils import extract_id, construct_article
-
-logger = logging.getLogger(__name__)
-logging.captureWarnings(True)
-API_ROOT = "api/v2.0/"
-
-
-class AbstractCrawler:
-
-    def __init__(self, auth, pool=None, session=None):
-        self.auth = auth
-        self.pool = pool or ThreadPoolExecutor(max_workers=conf.NB_WORKER)
-        self.session = session or FuturesSession(executor=self.pool)
-        self.session.verify = False
-        self.url = conf.PLATFORM_URL
-
-    def query_pyagg(self, method, urn, data=None):
-        """A wrapper for internal call, method should be ones you can find
-        on requests (header, post, get, options, ...), urn the distant
-        resources you want to access on pyagg, and data, the data you wanna
-        transmit."""
-        if data is None:
-            data = {}
-        method = getattr(self.session, method)
-        return method("%s%s%s" % (self.url, API_ROOT, urn),
-                      auth=self.auth, data=json.dumps(data,
-                                                      default=default_handler),
-                      headers={'Content-Type': 'application/json',
-                               'User-Agent': conf.USER_AGENT})
-
-    def wait(self, max_wait=300, checks=5, wait_for=2):
-        checked, second_waited = 0, 0
-        while True:
-            time.sleep(wait_for)
-            second_waited += wait_for
-            if second_waited > max_wait:
-                logger.warn('Exiting after %d seconds', second_waited)
-                break
-            if self.pool._work_queue.qsize():
-                checked = 0
-                continue
-            checked += 1
-            if checked == checks:
-                break
-
-
-class PyAggUpdater(AbstractCrawler):
-
-    def __init__(self, feed, entries, headers, parsed_feed,
-                 auth, pool=None, session=None):
-        self.feed = feed
-        self.entries = entries
-        self.headers = headers
-        self.parsed_feed = parsed_feed
-        super().__init__(auth, pool, session)
-
-    def callback(self, response):
-        """Will process the result from the challenge, creating missing article
-        and updating the feed"""
-        article_created = False
-        if response.result().status_code != 204:
-            results = response.result().json()
-            logger.debug('%r %r - %d entries were not matched '
-                         'and will be created',
-                         self.feed['id'], self.feed['title'], len(results))
-            for id_to_create in results:
-                article_created = True
-                entry = construct_article(
-                        self.entries[tuple(sorted(id_to_create.items()))],
-                        self.feed)
-                logger.info('%r %r - creating %r for %r - %r', self.feed['id'],
-                            self.feed['title'], entry['title'],
-                            entry['user_id'], id_to_create)
-                self.query_pyagg('post', 'article', entry)
-
-        logger.debug('%r %r - updating feed etag %r last_mod %r',
-                     self.feed['id'], self.feed['title'],
-                     self.headers.get('etag', ''),
-                     self.headers.get('last-modified', ''))
-
-        up_feed = {'error_count': 0, 'last_error': None,
-                   'etag': self.headers.get('etag', ''),
-                   'last_modified': self.headers.get('last-modified',
-                                    strftime('%a, %d %b %Y %X %Z', gmtime()))}
-        fresh_feed = construct_feed_from(url=self.feed['link'],
-                                         fp_parsed=self.parsed_feed)
-        for key in ('description', 'site_link', 'icon_url'):
-            if fresh_feed.get(key) and fresh_feed[key] != self.feed.get(key):
-                up_feed[key] = fresh_feed[key]
-        if not self.feed.get('title'):
-            up_feed['title'] = fresh_feed.get('title', '')
-        up_feed['user_id'] = self.feed['user_id']
-        # re-getting that feed earlier since new entries appeared
-        if article_created:
-            up_feed['last_retrieved'] \
-                    = (datetime.now() - timedelta(minutes=45)).isoformat()
-
-        diff_keys = {key for key in up_feed
-                     if up_feed[key] != self.feed.get(key)}
-        if not diff_keys:
-            return  # no change in the feed, no update
-        if not article_created and diff_keys == {'last_modified', 'etag'}:
-            return  # meaningless if no new article has been published
-        logger.info('%r %r - pushing feed attrs %r',
-                self.feed['id'], self.feed['title'],
-                {key: "%s -> %s" % (up_feed[key], self.feed.get(key))
-                 for key in up_feed if up_feed[key] != self.feed.get(key)})
-
-        self.query_pyagg('put', 'feed/%d' % self.feed['id'], up_feed)
-
-
-class FeedCrawler(AbstractCrawler):
-
-    def __init__(self, feed, auth, pool=None, session=None):
-        self.feed = feed
-        super().__init__(auth, pool, session)
-
-    def clean_feed(self):
-        """Will reset the errors counters on a feed that have known errors"""
-        if self.feed.get('error_count') or self.feed.get('last_error'):
-            self.query_pyagg('put', 'feed/%d' % self.feed['id'],
-                             {'error_count': 0, 'last_error': ''})
-
-    def callback(self, response):
-        """will fetch the feed and interprete results (304, etag) or will
-        challenge pyagg to compare gotten entries with existing ones"""
-        try:
-            response = response.result()
-            response.raise_for_status()
-        except Exception as error:
-            error_count = self.feed['error_count'] + 1
-            logger.exception('%r %r - an error occured while fetching '
-                    'feed; bumping  error count to %r',
-                    self.feed['id'], self.feed['title'], error_count)
-            future = self.query_pyagg('put', 'feed/%d' % self.feed['id'],
-                                      {'error_count': error_count,
-                                       'last_error': str(error),
-                                       'user_id': self.feed['user_id']})
-            return
-
-        if response.status_code == 304:
-            logger.info("%r %r - feed responded with 304",
-                        self.feed['id'], self.feed['title'])
-            self.clean_feed()
-            return
-        if 'etag' not in response.headers:
-            logger.debug('%r %r - manually generating etag',
-                         self.feed['id'], self.feed['title'])
-            response.headers['etag'] = 'pyagg/"%s"' % to_hash(response.text)
-        if response.headers['etag'] and self.feed['etag'] \
-                and response.headers['etag'] == self.feed['etag']:
-            if 'pyagg' in self.feed['etag']:
-                logger.info("%r %r - calculated hash matches (%d)",
-                            self.feed['id'], self.feed['title'],
-                            response.status_code)
-            else:
-                logger.info("%r %r - feed responded with same etag (%d)",
-                            self.feed['id'], self.feed['title'],
-                            response.status_code)
-            self.clean_feed()
-            return
-        else:
-            logger.debug('%r %r - etag mismatch %r != %r',
-                         self.feed['id'], self.feed['title'],
-                         response.headers['etag'], self.feed['etag'])
-        logger.info('%r %r - cache validation failed, challenging entries',
-                    self.feed['id'], self.feed['title'])
-
-        ids, entries = [], {}
-        parsed_response = feedparser.parse(response.content)
-        for entry in parsed_response['entries']:
-            entry_ids = extract_id(entry)
-            entry_ids['feed_id'] = self.feed['id']
-            entry_ids['user_id'] = self.feed['user_id']
-            entries[tuple(sorted(entry_ids.items()))] = entry
-            ids.append(entry_ids)
-        logger.debug('%r %r - found %d entries %r',
-                     self.feed['id'], self.feed['title'], len(ids), ids)
-        future = self.query_pyagg('get', 'articles/challenge', {'ids': ids})
-        updater = PyAggUpdater(self.feed, entries, response.headers,
-                               parsed_response,
-                               self.auth, self.pool, self.session)
-        future.add_done_callback(updater.callback)
-
-
-class CrawlerScheduler(AbstractCrawler):
-
-    def __init__(self, username, password, pool=None, session=None):
-        self.auth = (username, password)
-        super(CrawlerScheduler, self).__init__(self.auth, pool, session)
-
-    def prepare_headers(self, feed):
-        """For a known feed, will construct some header dictionnary"""
-        headers = {'User-Agent': conf.USER_AGENT}
-        if feed.get('last_modified'):
-            headers['If-Modified-Since'] = feed['last_modified']
-        if feed.get('etag') and 'pyagg' not in feed['etag']:
-            headers['If-None-Match'] = feed['etag']
-        logger.debug('%r %r - calculated headers %r',
-                     feed['id'], feed['title'], headers)
-        return headers
-
-    def callback(self, response):
-        """processes feeds that need to be fetched"""
-        response = response.result()
-        response.raise_for_status()
-        if response.status_code == 204:
-            logger.debug("No feed to fetch")
-            return
-        feeds = response.json()
-        logger.debug('%d to fetch %r', len(feeds), feeds)
-        for feed in feeds:
-            logger.debug('%r %r - fetching resources',
-                         feed['id'], feed['title'])
-            future = self.session.get(feed['link'],
-                                      headers=self.prepare_headers(feed))
-
-            feed_crwlr = FeedCrawler(feed, self.auth, self.pool, self.session)
-            future.add_done_callback(feed_crwlr.callback)
-
-    def run(self, **kwargs):
-        """entry point, will retreive feeds to be fetch
-        and launch the whole thing"""
-        logger.debug('retreving fetchable feed')
-        future = self.query_pyagg('get', 'feeds/fetchable', kwargs)
-        future.add_done_callback(self.callback)
diff --git a/src/web/utils.py b/src/web/utils.py
index fcd791e8..1d4b30ab 100755
--- a/src/web/utils.py
+++ b/src/web/utils.py
@@ -109,8 +109,8 @@ def fetch(id, feed_id=None):
     Fetch the feeds in a new processus.
     The "asyncio" crawler is launched with the manager.
     """
-    cmd = [sys.executable, conf.BASE_DIR+'/manager.py', 'fetch_asyncio', str(id),
-            str(feed_id)]
+    cmd = [sys.executable, conf.BASE_DIR + '/manager.py', 'fetch_asyncio',
+           str(id), str(feed_id)]
     return subprocess.Popen(cmd, stdout=subprocess.PIPE)
 
 def history(user_id, year=None, month=None):
-- 
cgit