#! /usr/bin/env python #-*- coding: utf-8 -*- # pyAggr3g470r - A Web based news aggregator. # Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/ # # For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/ # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see __author__ = "Cedric Bonhomme" __version__ = "$Revision: 0.5 $" __date__ = "$Date: 2011/10/24 $" __revision__ = "$Date: 2013/12/07 $" __copyright__ = "Copyright (c) Cedric Bonhomme" __license__ = "GPLv3" # # This file contains the export functions of pyAggr3g470r. Indeed # it is possible to export the database of articles in different formats: # - simple HTML webzine; # - text file. # import os import time import tarfile import conf import utils import models def HTML_HEADER(title="pyAggr3g470r", css="./style.css"): return """ %s """ % (title, css) HTML_FOOTER = """

This archive has been generated with pyAggr3g470r. A software under GPLv3 license. You are welcome to copy, modify or redistribute the source code according to the GPLv3 license.

""" CSS = """body { font:normal medium 'Gill Sans','Gill Sans MT',Verdana,sans-serif; margin:1.20em auto; width:80%; line-height:1.75; } blockquote { font-size:small; line-height:2.153846; margin:2.153846em 0; padding:0;font-style:oblique; border-left:1px dotted; margin-left:2.153846em; padding-left:2.153846em; } blockquote p{ margin:2.153846em 0; } p+br { display:none; } h1 { font-size:large; } h2,h3 { font-size:medium; } hr { border-style:dotted; height:1px; border-width: 1px 0 0 0; margin:1.45em 0 1.4em; padding:0; } a { text-decoration:none; color:#00008B; } #footer { clear:both; text-align:center; font-size:small; } img { border:0; } .horizontal,.simple li { margin:0; padding:0; list-style:none; display:inline } .simple li:before { content:"+ "; } .simple > li:first-child:before { content:""; } .author { text-decoration:none; display:block; float:right; margin-left:2em; font-size:small; } .content { margin:1.00em 1.00em; }""" def export_html(feeds): """ Export the articles given in parameter in a simple Webzine. """ #tar = tarfile.open(conf.PATH + "/pyaggr3g470r/var/export.tar.gz", "w:gz") nb_articles = format(len(models.Article.objects()), ",d") index = HTML_HEADER("News archive") index += "

List of feeds

\n" index += """

%s articles.

\n\n" index += "

" + time.strftime("Generated on %d %b %Y at %H:%M.") + "

\n" index += HTML_FOOTER with open(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "index.html", "w") as f: f.write(index.encode("utf-8")) #tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "index.html") with open(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "style.css", "w") as f: f.write(CSS.encode("utf-8")) #tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/" + "style.css") #tar.close() with tarfile.open(conf.PATH + "/pyaggr3g470r/var/export.tar.gz", "w:gz") as tar: tar.add(conf.PATH + "/pyaggr3g470r/var/export/webzine/", arcname=os.path.basename(conf.PATH + "/pyaggr3g470r/var/export/webzine/")) def export_txt(mongo_db): """ Export the articles given in parameter in text files. """ feeds = mongo_db.get_all_feeds() for feed in feeds: # creates folder for each stream folder = conf.PATH + "/var/export/txt/" + \ utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower()) try: os.makedirs(folder) except OSError: # directories already exists (not a problem) pass for article in mongo_db.get_articles(feed_id=feed["feed_id"]): name = article["article_date"].ctime().strip().replace(' ', '_') name = os.path.normpath(folder + "/" + name + ".txt") content = "Title: " + article["article_title"] + "\n\n\n" content += utils.clear_string(article["article_content"]) with open(name, "w") as f: f.write(content)