diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2013-12-08 01:58:06 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2013-12-08 01:58:06 +0100 |
commit | 2bcde34af4707e2644be45ae9e1a075f445113f0 (patch) | |
tree | 938f1145a7574585bf5ed6b678e6251671b4c797 | |
parent | Updated README (diff) | |
download | newspipe-2bcde34af4707e2644be45ae9e1a075f445113f0.tar.gz newspipe-2bcde34af4707e2644be45ae9e1a075f445113f0.tar.bz2 newspipe-2bcde34af4707e2644be45ae9e1a075f445113f0.zip |
It is now possible to export all articles in a simple HTML site. The result is returned as a compressed tgz file.
-rw-r--r-- | pyaggr3g470r/export.py | 220 | ||||
-rw-r--r-- | pyaggr3g470r/templates/management.html | 4 | ||||
-rw-r--r-- | pyaggr3g470r/views.py | 19 |
3 files changed, 242 insertions, 1 deletions
diff --git a/pyaggr3g470r/export.py b/pyaggr3g470r/export.py new file mode 100644 index 00000000..e220bde7 --- /dev/null +++ b/pyaggr3g470r/export.py @@ -0,0 +1,220 @@ +#! /usr/bin/env python +#-*- coding: utf-8 -*- + +# pyAggr3g470r - A Web based news aggregator. +# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ +# +# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/> + +__author__ = "Cedric Bonhomme" +__version__ = "$Revision: 0.5 $" +__date__ = "$Date: 2011/10/24 $" +__revision__ = "$Date: 2013/12/07 $" +__copyright__ = "Copyright (c) Cedric Bonhomme" +__license__ = "GPLv3" + +# +# This file contains the export functions of pyAggr3g470r. Indeed +# it is possible to export the database of articles in different formats: +# - simple HTML webzine; +# - text file. +# + +import os +import time +import tarfile + +import conf +import utils +import models + +def HTML_HEADER(title="pyAggr3g470r", css="./style.css"): + return """<!DOCTYPE html> +<html lang="en-US"> +<head> +<title>%s</title> +<meta charset="utf-8"/> +<link rel="stylesheet" href="%s" /> +</head> +<body>""" % (title, css) + +HTML_FOOTER = """<hr /> +<p>This archive has been generated with +<a href="https://bitbucket.org/cedricbonhomme/pyaggr3g470r/">pyAggr3g470r</a>. +A software under GPLv3 license. +You are welcome to copy, modify or redistribute the source code according to the +<a href="http://www.gnu.org/licenses/gpl-3.0.txt">GPLv3</a> license.</p> +</body> +</html> +""" + +CSS = """body { + font:normal medium 'Gill Sans','Gill Sans MT',Verdana,sans-serif; + margin:1.20em auto; + width:80%; + line-height:1.75; +} +blockquote { + font-size:small; + line-height:2.153846; + margin:2.153846em 0; + padding:0;font-style:oblique; + border-left:1px dotted; + margin-left:2.153846em; + padding-left:2.153846em; +} +blockquote p{ + margin:2.153846em 0; +} +p+br { + display:none; +} +h1 { +font-size:large; +} +h2,h3 { + font-size:medium; +} +hr { + border-style:dotted; + height:1px; + border-width: 1px 0 0 0; + margin:1.45em 0 1.4em; + padding:0; +} +a { + text-decoration:none; + color:#00008B; +} +#footer { + clear:both; + text-align:center; + font-size:small; +} +img { + border:0; +} +.horizontal,.simple li { + margin:0; + padding:0; + list-style:none; + display:inline +} +.simple li:before { + content:"+ "; +} +.simple > li:first-child:before { + content:""; +} +.author { + text-decoration:none; + display:block; + float:right; + margin-left:2em; + font-size:small; +} +.content { + margin:1.00em 1.00em; +}""" + +def export_html(feeds): + """ + Export the articles given in parameter in a simple Webzine. + """ + #tar = tarfile.open(conf.PATH + "/pyaggr3g470r/var/export.tar.gz", "w:gz") + nb_articles = format(len(models.Article.objects()), ",d") + index = HTML_HEADER("News archive") + index += "<h1>List of feeds</h1>\n" + index += """<p>%s articles.</p>\n<ul>\n""" % (nb_articles,) + for feed in feeds: + # creates a folder for each stream + feed_folder = conf.PATH + "/pyaggr3g470r/var/export/webzine/" + str(feed.oid) + try: + os.makedirs(feed_folder) + except OSError: + # directories already exists (not a problem) + pass + + index += """ <li><a href="%s">%s</a></li>\n""" % (feed.oid, feed.title) + + posts = HTML_HEADER(feed.title, "../style.css") + posts += """<h1>Articles of the feed <a href="%s">%s</a></h1>\n""" % (feed.site_link, feed.title) + posts += """<p>%s articles.</p>\n""" % (format(len(feed.articles), ",d"),) + + for article in feed.articles: + + post_file_name = os.path.normpath(feed_folder + "/" + str(article.id) + ".html") + feed_index = os.path.normpath(feed_folder + "/index.html") + + posts += article.date.ctime() + " - " + """<a href="./%s.html">%s</a>""" % \ + (article.id, article.title[:150]) + "<br />\n" + + a_post = HTML_HEADER(article.title, "../style.css") + a_post += '<div style="width:60%; overflow:hidden; text-align:justify; margin:0 auto">\n' + a_post += """<h1><a href="%s">%s</a></h1>\n<br />""" % \ + (article.link, article.title) + a_post += article.content + a_post += "</div>\n<hr />\n" + a_post += """<br />\n<a href="%s">Complete story</a>\n<br />\n""" % (article.link,) + a_post += HTML_FOOTER + + with open(post_file_name, "w") as f: + f.write(a_post.encode("utf-8")) + #tar.add(post_file_name) + + posts += HTML_FOOTER + with open(feed_index, "w") as f: + f.write(posts.encode("utf-8")) + #tar.add(feed_index) + + index += "</ul>\n" + index += "<p>" + time.strftime("Generated on %d %b %Y at %H:%M.") + "</p>\n" + index += HTML_FOOTER + with open(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "index.html", "w") as f: + f.write(index.encode("utf-8")) + #tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "index.html") + with open(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "style.css", "w") as f: + f.write(CSS.encode("utf-8")) + #tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "style.css") + #tar.close() + + with tarfile.open(conf.PATH + "/pyaggr3g470r/var/export.tar.gz", "w:gz") as tar: + tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/", arcname=os.path.basename(conf.PATH + "/pyaggr3g470r/var/export/webzine/")) + +def export_txt(mongo_db): + """ + Export the articles given in parameter in text files. + """ + feeds = mongo_db.get_all_feeds() + for feed in feeds: + # creates folder for each stream + folder = conf.PATH + "/var/export/txt/" + \ + utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower()) + try: + os.makedirs(folder) + except OSError: + # directories already exists (not a problem) + pass + + for article in mongo_db.get_articles(feed_id=feed["feed_id"]): + name = article["article_date"].ctime().strip().replace(' ', '_') + name = os.path.normpath(folder + "/" + name + ".txt") + + content = "Title: " + article["article_title"] + "\n\n\n" + content += utils.clear_string(article["article_content"]) + + with open(name, "w") as f: + f.write(content)
\ No newline at end of file diff --git a/pyaggr3g470r/templates/management.html b/pyaggr3g470r/templates/management.html index 5101590b..efa649f0 100644 --- a/pyaggr3g470r/templates/management.html +++ b/pyaggr3g470r/templates/management.html @@ -11,5 +11,9 @@ <h1>Your Profile</h1> <p>Update your <a href="/profile/">profile</a>.</p> </div> + <div class="jumbotron"> + <h1>Export</h1> + <a href="/export/" class="btn btn-default">HTML</a> + </div> </div><!-- /.container --> {% endblock %}
\ No newline at end of file diff --git a/pyaggr3g470r/views.py b/pyaggr3g470r/views.py index 48e0e49d..a458c3eb 100644 --- a/pyaggr3g470r/views.py +++ b/pyaggr3g470r/views.py @@ -28,7 +28,7 @@ __license__ = "GPLv3" import datetime -from flask import render_template, request, flash, session, url_for, redirect, g +from flask import render_template, request, make_response, flash, session, url_for, redirect, g from wtforms import TextField, PasswordField, SubmitField, validators from flask.ext.login import LoginManager, login_user, logout_user, login_required, current_user, AnonymousUserMixin from collections import defaultdict @@ -37,7 +37,9 @@ from forms import SigninForm, AddFeedForm, ProfileForm from pyaggr3g470r import app, db +import conf import utils +import export import feedgetter import models import search as fastsearch @@ -255,6 +257,21 @@ def index_database(): fastsearch.create_index(user.feeds) return redirect(url_for('home')) +@app.route('/export/', methods=['GET']) +@login_required +def export_articles(): + """ + Export all articles. + """ + user = models.User.objects(email=g.user.email).first() + export.export_html(user.feeds) + with open(conf.PATH + '/pyaggr3g470r/var/export.tar.gz', 'r') as export_file: + response = make_response(export_file.read()) + response.headers['Content-Type'] = 'application/x-compressed' + response.headers['Content-Disposition'] = 'attachment; filename=export.tar.gz' + return response + return redirect(url_for('management')) + @app.route('/search/', methods=['GET']) @login_required def search(): |