#! /usr/bin/env python #-*- coding: utf-8 -*- # pyAggr3g470r - A Web based news aggregator. # Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ # # For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see __author__ = "Cedric Bonhomme" __version__ = "$Revision: 0.3 $" __date__ = "$Date: 2011/10/24 $" __revision__ = "$Date: 2013/01/18 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" # # This file contains the export functions of pyAggr3g470r. Indeed # it is possible to export the database of articles in different formats: # - simple HTML webzine; # - text file; # - ePub file; # - PDF file. # import os import conf import utils def HTML_HEADER(title="pyAggr3g470r"): return """ %s """ % (title,) HTML_FOOTER = """

This archive has been generated with pyAggr3g470r. A software under GPLv3 license. You are welcome to copy, modify or redistribute the source code according to the GPLv3 license.

""" def export_html(mongo_db): """ Export the articles given in parameter in a simple Webzine. """ nb_articles = format(mongo_db.nb_articles(), ",d") feeds = mongo_db.get_all_feeds() index = HTML_HEADER("News archive") index += "

List of feeds

\n" index += """

%s articles.

Articles of the feed %s

%s articles.

\n' a_post += """

%s

\n
""" % \ (article["article_link"], article["article_title"]) a_post += article["article_content"] a_post += "

Complete story

\n
\n" index += HTML_FOOTER with open(conf.path + "/var/export/webzine/" + "index.html", "w") as f: f.write(index) def export_txt(mongo_db): """ Export the articles given in parameter in text files. """ feeds = mongo_db.get_all_feeds() for feed in feeds: # creates folder for each stream folder = conf.path + "/var/export/txt/" + \ utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower()) try: os.makedirs(folder) except OSError: # directories already exists (not a problem) pass for article in mongo_db.get_articles(feed_id=feed["feed_id"]): name = article["article_date"].ctime().strip().replace(' ', '_') name = os.path.normpath(folder + "/" + name + ".txt") content = "Title: " + article["article_title"] + "\n\n\n" content += utils.clear_string(article["article_content"]) with open(name, "w") as f: f.write(content) def export_epub(mongo_db): """ Export the articles given in parameter in ePub files. """ from epub import ez_epub feeds = mongo_db.get_all_feeds() for feed in feeds: # creates folder for each stream folder = conf.path + "/var/export/epub/" + \ utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower().encode('utf-8')) try: os.makedirs(folder) except OSError: # directories already exists (not a problem) pass for article in mongo_db.get_articles(feed_id=feed["feed_id"]): name = article["article_date"].ctime().strip().replace(' ', '_') name = os.path.normpath(folder + "/" + name + ".epub") section = ez_epub.Section() section.title = article["article_title"] section.paragraphs = [utils.clear_string(article["article_content"])] ez_epub.makeBook(article["article_title"], [feed["feed_title"]], [section], \ name, lang='en-US', cover=None) def export_pdf(feeds): """ Export the articles given in parameter in PDF files. """ from xhtml2pdf import pisa import io as StringIO for feed in list(feeds.values()): # creates folder for each stream folder = utils.path + "/var/export/pdf/" + \ utils.normalize_filename(feed.feed_title.strip().replace(':', '').lower()) try: os.makedirs(folder) except OSError: # directories already exists (not a problem) pass for article in list(feed.articles.values()): name = article.article_date.strip().replace(' ', '_') name = os.path.normpath(folder + "/" + name + ".pdf") content = HTML_HEADER(article.article_title) content += '\n

\n' content += """

%s

""" % \ (article.article_link, article.article_title) content += article.article_description content += "

\n" content += HTML_FOOTER try: pdf = pisa.CreatePDF(StringIO.StringIO(content), file(name, "wb")) except: pass