From 2bcde34af4707e2644be45ae9e1a075f445113f0 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 8 Dec 2013 01:58:06 +0100 Subject: It is now possible to export all articles in a simple HTML site. The result is returned as a compressed tgz file. --- pyaggr3g470r/export.py | 220 +++++++++++++++++++++++++++++++++ pyaggr3g470r/templates/management.html | 4 + pyaggr3g470r/views.py | 19 ++- 3 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 pyaggr3g470r/export.py diff --git a/pyaggr3g470r/export.py b/pyaggr3g470r/export.py new file mode 100644 index 00000000..e220bde7 --- /dev/null +++ b/pyaggr3g470r/export.py @@ -0,0 +1,220 @@ +#! /usr/bin/env python +#-*- coding: utf-8 -*- + +# pyAggr3g470r - A Web based news aggregator. +# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ +# +# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +__author__ = "Cedric Bonhomme" +__version__ = "$Revision: 0.5 $" +__date__ = "$Date: 2011/10/24 $" +__revision__ = "$Date: 2013/12/07 $" +__copyright__ = "Copyright (c) Cedric Bonhomme" +__license__ = "GPLv3" + +# +# This file contains the export functions of pyAggr3g470r. Indeed +# it is possible to export the database of articles in different formats: +# - simple HTML webzine; +# - text file. +# + +import os +import time +import tarfile + +import conf +import utils +import models + +def HTML_HEADER(title="pyAggr3g470r", css="./style.css"): + return """ + + +%s + + + +""" % (title, css) + +HTML_FOOTER = """
+

This archive has been generated with +pyAggr3g470r. +A software under GPLv3 license. +You are welcome to copy, modify or redistribute the source code according to the +GPLv3 license.

+ + +""" + +CSS = """body { + font:normal medium 'Gill Sans','Gill Sans MT',Verdana,sans-serif; + margin:1.20em auto; + width:80%; + line-height:1.75; +} +blockquote { + font-size:small; + line-height:2.153846; + margin:2.153846em 0; + padding:0;font-style:oblique; + border-left:1px dotted; + margin-left:2.153846em; + padding-left:2.153846em; +} +blockquote p{ + margin:2.153846em 0; +} +p+br { + display:none; +} +h1 { +font-size:large; +} +h2,h3 { + font-size:medium; +} +hr { + border-style:dotted; + height:1px; + border-width: 1px 0 0 0; + margin:1.45em 0 1.4em; + padding:0; +} +a { + text-decoration:none; + color:#00008B; +} +#footer { + clear:both; + text-align:center; + font-size:small; +} +img { + border:0; +} +.horizontal,.simple li { + margin:0; + padding:0; + list-style:none; + display:inline +} +.simple li:before { + content:"+ "; +} +.simple > li:first-child:before { + content:""; +} +.author { + text-decoration:none; + display:block; + float:right; + margin-left:2em; + font-size:small; +} +.content { + margin:1.00em 1.00em; +}""" + +def export_html(feeds): + """ + Export the articles given in parameter in a simple Webzine. + """ + #tar = tarfile.open(conf.PATH + "/pyaggr3g470r/var/export.tar.gz", "w:gz") + nb_articles = format(len(models.Article.objects()), ",d") + index = HTML_HEADER("News archive") + index += "

List of feeds

\n" + index += """

%s articles.

\n\n" + index += "

" + time.strftime("Generated on %d %b %Y at %H:%M.") + "

\n" + index += HTML_FOOTER + with open(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "index.html", "w") as f: + f.write(index.encode("utf-8")) + #tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "index.html") + with open(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "style.css", "w") as f: + f.write(CSS.encode("utf-8")) + #tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "style.css") + #tar.close() + + with tarfile.open(conf.PATH + "/pyaggr3g470r/var/export.tar.gz", "w:gz") as tar: + tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/", arcname=os.path.basename(conf.PATH + "/pyaggr3g470r/var/export/webzine/")) + +def export_txt(mongo_db): + """ + Export the articles given in parameter in text files. + """ + feeds = mongo_db.get_all_feeds() + for feed in feeds: + # creates folder for each stream + folder = conf.PATH + "/var/export/txt/" + \ + utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower()) + try: + os.makedirs(folder) + except OSError: + # directories already exists (not a problem) + pass + + for article in mongo_db.get_articles(feed_id=feed["feed_id"]): + name = article["article_date"].ctime().strip().replace(' ', '_') + name = os.path.normpath(folder + "/" + name + ".txt") + + content = "Title: " + article["article_title"] + "\n\n\n" + content += utils.clear_string(article["article_content"]) + + with open(name, "w") as f: + f.write(content) \ No newline at end of file diff --git a/pyaggr3g470r/templates/management.html b/pyaggr3g470r/templates/management.html index 5101590b..efa649f0 100644 --- a/pyaggr3g470r/templates/management.html +++ b/pyaggr3g470r/templates/management.html @@ -11,5 +11,9 @@

Your Profile

Update your profile.

+
+

Export

+ HTML +
{% endblock %} \ No newline at end of file diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py index 48e0e49d..a458c3eb 100644 --- a/pyaggr3g470r/views.py +++ b/pyaggr3g470r/views.py @@ -28,7 +28,7 @@ __license__ = "GPLv3" import datetime -from flask import render_template, request, flash, session, url_for, redirect, g +from flask import render_template, request, make_response, flash, session, url_for, redirect, g from wtforms import TextField, PasswordField, SubmitField, validators from flask.ext.login import LoginManager, login_user, logout_user, login_required, current_user, AnonymousUserMixin from collections import defaultdict @@ -37,7 +37,9 @@ from forms import SigninForm, AddFeedForm, ProfileForm from pyaggr3g470r import app, db +import conf import utils +import export import feedgetter import models import search as fastsearch @@ -255,6 +257,21 @@ def index_database(): fastsearch.create_index(user.feeds) return redirect(url_for('home')) +@app.route('/export/', methods=['GET']) +@login_required +def export_articles(): + """ + Export all articles. + """ + user = models.User.objects(email=g.user.email).first() + export.export_html(user.feeds) + with open(conf.PATH + '/pyaggr3g470r/var/export.tar.gz', 'r') as export_file: + response = make_response(export_file.read()) + response.headers['Content-Type'] = 'application/x-compressed' + response.headers['Content-Disposition'] = 'attachment; filename=export.tar.gz' + return response + return redirect(url_for('management')) + @app.route('/search/', methods=['GET']) @login_required def search(): -- cgit