#! /usr/bin/env python
#-*- coding: utf-8 -*-
# pyAggr3g470r - A Web based news aggregator.
# Copyright (C) 2010-2013 Cédric Bonhomme - http://cedricbonhomme.org/
#
# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
__author__ = "Cedric Bonhomme"
__version__ = "$Revision: 0.3 $"
__date__ = "$Date: 2011/10/24 $"
__revision__ = "$Date: 2013/01/18 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "GPLv3"
#
# This file contains the export functions of pyAggr3g470r. Indeed
# it is possible to export the database of articles in different formats:
# - simple HTML webzine;
# - text file;
# - ePub file;
# - PDF file.
#
import os
import conf
import utils
def HTML_HEADER(title="pyAggr3g470r"):
return """
%s
""" % (title,)
HTML_FOOTER = """
This archive has been generated with
pyAggr3g470r.
A software under GPLv3 license.
You are welcome to copy, modify or redistribute the source code according to the
GPLv3 license.
"""
def export_html(mongo_db):
"""
Export the articles given in parameter in a simple Webzine.
"""
nb_articles = format(mongo_db.nb_articles(), ",d")
feeds = mongo_db.get_all_feeds()
index = HTML_HEADER("News archive")
index += "List of feeds
\n"
index += """%s articles.
\n\n
\n"
index += HTML_FOOTER
with open(conf.path + "/var/export/webzine/" + "index.html", "w") as f:
f.write(index)
def export_txt(mongo_db):
"""
Export the articles given in parameter in text files.
"""
feeds = mongo_db.get_all_feeds()
for feed in feeds:
# creates folder for each stream
folder = conf.path + "/var/export/txt/" + \
utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower())
try:
os.makedirs(folder)
except OSError:
# directories already exists (not a problem)
pass
for article in mongo_db.get_articles(feed_id=feed["feed_id"]):
name = article["article_date"].ctime().strip().replace(' ', '_')
name = os.path.normpath(folder + "/" + name + ".txt")
content = "Title: " + article["article_title"] + "\n\n\n"
content += utils.clear_string(article["article_content"])
with open(name, "w") as f:
f.write(content)
def export_epub(mongo_db):
"""
Export the articles given in parameter in ePub files.
"""
from epub import ez_epub
feeds = mongo_db.get_all_feeds()
for feed in feeds:
# creates folder for each stream
folder = conf.path + "/var/export/epub/" + \
utils.normalize_filename(feed["feed_title"].strip().replace(':', '').lower().encode('utf-8'))
try:
os.makedirs(folder)
except OSError:
# directories already exists (not a problem)
pass
for article in mongo_db.get_articles(feed_id=feed["feed_id"]):
name = article["article_date"].ctime().strip().replace(' ', '_')
name = os.path.normpath(folder + "/" + name + ".epub")
section = ez_epub.Section()
section.title = article["article_title"]
section.paragraphs = [utils.clear_string(article["article_content"])]
ez_epub.makeBook(article["article_title"], [feed["feed_title"]], [section], \
name, lang='en-US', cover=None)
def export_pdf(feeds):
"""
Export the articles given in parameter in PDF files.
"""
from xhtml2pdf import pisa
import io as StringIO
for feed in list(feeds.values()):
# creates folder for each stream
folder = utils.path + "/var/export/pdf/" + \
utils.normalize_filename(feed.feed_title.strip().replace(':', '').lower())
try:
os.makedirs(folder)
except OSError:
# directories already exists (not a problem)
pass
for article in list(feed.articles.values()):
name = article.article_date.strip().replace(' ', '_')
name = os.path.normpath(folder + "/" + name + ".pdf")
content = HTML_HEADER(article.article_title)
content += '\n\n'
content += """
""" % \
(article.article_link, article.article_title)
content += article.article_description
content += "
\n
\n"
content += HTML_FOOTER
try:
pdf = pisa.CreatePDF(StringIO.StringIO(content), file(name, "wb"))
except:
pass