aboutsummaryrefslogtreecommitdiff
path: root/epub
diff options
context:
space:
mode:
authorcedricbonhomme <devnull@localhost>2010-11-23 22:13:32 +0100
committercedricbonhomme <devnull@localhost>2010-11-23 22:13:32 +0100
commit0ce90a043eef532a71952e6aac43e6be6affc0f0 (patch)
tree1adaa7e3ff72891223705a0bafa978f74d3d3984 /epub
parentImprovement of the search results (ToolTips, display). (diff)
downloadnewspipe-0ce90a043eef532a71952e6aac43e6be6affc0f0.tar.gz
newspipe-0ce90a043eef532a71952e6aac43e6be6affc0f0.tar.bz2
newspipe-0ce90a043eef532a71952e6aac43e6be6affc0f0.zip
Added import to EPUB function.
Diffstat (limited to 'epub')
-rw-r--r--epub/__init__.py1
-rw-r--r--epub/epub.py354
-rw-r--r--epub/ez_epub.py38
-rw-r--r--epub/templates/container.xml6
-rw-r--r--epub/templates/content.opf34
-rw-r--r--epub/templates/ez-section.html17
-rw-r--r--epub/templates/image.html16
-rw-r--r--epub/templates/title-page.html22
-rw-r--r--epub/templates/toc.html32
-rw-r--r--epub/templates/toc.ncx28
10 files changed, 548 insertions, 0 deletions
diff --git a/epub/__init__.py b/epub/__init__.py
new file mode 100644
index 00000000..8d1c8b69
--- /dev/null
+++ b/epub/__init__.py
@@ -0,0 +1 @@
+
diff --git a/epub/epub.py b/epub/epub.py
new file mode 100644
index 00000000..834acee6
--- /dev/null
+++ b/epub/epub.py
@@ -0,0 +1,354 @@
+#! /usr/local/bin/python
+#-*- coding: utf-8 -*-
+
+import itertools
+import mimetypes
+import os
+import shutil
+import subprocess
+import uuid
+import zipfile
+from genshi.template import TemplateLoader
+from lxml import etree
+
+class TocMapNode:
+
+ def __init__(self):
+ self.playOrder = 0
+ self.title = ''
+ self.href = ''
+ self.children = []
+ self.depth = 0
+
+ def assignPlayOrder(self):
+ nextPlayOrder = [0]
+ self.__assignPlayOrder(nextPlayOrder)
+
+ def __assignPlayOrder(self, nextPlayOrder):
+ self.playOrder = nextPlayOrder[0]
+ nextPlayOrder[0] = self.playOrder + 1
+ for child in self.children:
+ child.__assignPlayOrder(nextPlayOrder)
+
+
+class EpubItem:
+
+ def __init__(self):
+ self.id = ''
+ self.srcPath = ''
+ self.destPath = ''
+ self.mimeType = ''
+ self.html = ''
+
+
+class EpubBook:
+
+ def __init__(self):
+ self.loader = TemplateLoader('./epub/templates')
+
+ self.rootDir = ''
+ self.UUID = uuid.uuid1()
+
+ self.lang = 'en-US'
+ self.title = ''
+ self.creators = []
+ self.metaInfo = []
+
+ self.imageItems = {}
+ self.htmlItems = {}
+ self.cssItems = {}
+
+ self.coverImage = None
+ self.titlePage = None
+ self.tocPage = None
+
+ self.spine = []
+ self.guide = {}
+ self.tocMapRoot = TocMapNode()
+ self.lastNodeAtDepth = {0 : self.tocMapRoot}
+
+ def setTitle(self, title):
+ self.title = title
+
+ def setLang(self, lang):
+ self.lang = lang
+
+ def addCreator(self, name, role = 'aut'):
+ self.creators.append((name, role))
+
+ def addMeta(self, metaName, metaValue, **metaAttrs):
+ self.metaInfo.append((metaName, metaValue, metaAttrs))
+
+ def getMetaTags(self):
+ l = []
+ for metaName, metaValue, metaAttr in self.metaInfo:
+ beginTag = '<dc:%s' % metaName
+ if metaAttr:
+ for attrName, attrValue in metaAttr.iteritems():
+ beginTag += ' opf:%s="%s"' % (attrName, attrValue)
+ beginTag += '>'
+ endTag = '</dc:%s>' % metaName
+ l.append((beginTag, metaValue, endTag))
+ return l
+
+ def getImageItems(self):
+ return sorted(self.imageItems.values(), key = lambda x : x.id)
+
+ def getHtmlItems(self):
+ return sorted(self.htmlItems.values(), key = lambda x : x.id)
+
+ def getCssItems(self):
+ return sorted(self.cssItems.values(), key = lambda x : x.id)
+
+ def getAllItems(self):
+ return sorted(itertools.chain(self.imageItems.values(), self.htmlItems.values(), self.cssItems.values()), key = lambda x : x.id)
+
+ def addImage(self, srcPath, destPath):
+ item = EpubItem()
+ item.id = 'image_%d' % (len(self.imageItems) + 1)
+ item.srcPath = srcPath
+ item.destPath = destPath
+ item.mimeType = mimetypes.guess_type(destPath)[0]
+ assert item.destPath not in self.imageItems
+ self.imageItems[destPath] = item
+ return item
+
+ def addHtmlForImage(self, imageItem):
+ tmpl = self.loader.load('image.html')
+ stream = tmpl.generate(book = self, item = imageItem)
+ html = stream.render('xhtml', doctype = 'xhtml11', drop_xml_decl = False)
+ return self.addHtml('', '%s.html' % imageItem.destPath, html)
+
+ def addHtml(self, srcPath, destPath, html):
+ item = EpubItem()
+ item.id = 'html_%d' % (len(self.htmlItems) + 1)
+ item.srcPath = srcPath
+ item.destPath = destPath
+ item.html = html
+ item.mimeType = 'application/xhtml+xml'
+ assert item.destPath not in self.htmlItems
+ self.htmlItems[item.destPath] = item
+ return item
+
+ def addCss(self, srcPath, destPath):
+ item = EpubItem()
+ item.id = 'css_%d' % (len(self.cssItems) + 1)
+ item.srcPath = srcPath
+ item.destPath = destPath
+ item.mimeType = 'text/css'
+ assert item.destPath not in self.cssItems
+ self.cssItems[item.destPath] = item
+ return item
+
+ def addCover(self, srcPath):
+ assert not self.coverImage
+ _, ext = os.path.splitext(srcPath)
+ destPath = 'cover%s' % ext
+ self.coverImage = self.addImage(srcPath, destPath)
+ #coverPage = self.addHtmlForImage(self.coverImage)
+ #self.addSpineItem(coverPage, False, -300)
+ #self.addGuideItem(coverPage.destPath, 'Cover', 'cover')
+
+ def __makeTitlePage(self):
+ assert self.titlePage
+ if self.titlePage.html:
+ return
+ tmpl = self.loader.load('title-page.html')
+ stream = tmpl.generate(book = self)
+ self.titlePage.html = stream.render('xhtml', doctype = 'xhtml11', drop_xml_decl = False)
+
+ def addTitlePage(self, html = ''):
+ assert not self.titlePage
+ self.titlePage = self.addHtml('', 'title-page.html', html)
+ self.addSpineItem(self.titlePage, True, -200)
+ self.addGuideItem('title-page.html', 'Title Page', 'title-page')
+
+ def __makeTocPage(self):
+ assert self.tocPage
+ tmpl = self.loader.load('toc.html')
+ stream = tmpl.generate(book = self)
+ self.tocPage.html = stream.render('xhtml', doctype = 'xhtml11', drop_xml_decl = False)
+
+ def addTocPage(self):
+ assert not self.tocPage
+ self.tocPage = self.addHtml('', 'toc.html', '')
+ self.addSpineItem(self.tocPage, False, -100)
+ self.addGuideItem('toc.html', 'Table of Contents', 'toc')
+
+ def getSpine(self):
+ return sorted(self.spine)
+
+ def addSpineItem(self, item, linear = True, order = None):
+ assert item.destPath in self.htmlItems
+ if order == None:
+ order = (max(order for order, _, _ in self.spine) if self.spine else 0) + 1
+ self.spine.append((order, item, linear))
+
+ def getGuide(self):
+ return sorted(self.guide.values(), key = lambda x : x[2])
+
+ def addGuideItem(self, href, title, type):
+ assert type not in self.guide
+ self.guide[type] = (href, title, type)
+
+ def getTocMapRoot(self):
+ return self.tocMapRoot
+
+ def getTocMapHeight(self):
+ return max(self.lastNodeAtDepth.keys())
+
+ def addTocMapNode(self, href, title, depth = None, parent = None):
+ node = TocMapNode()
+ node.href = href
+ node.title = title
+ if parent == None:
+ if depth == None:
+ parent = self.tocMapRoot
+ else:
+ parent = self.lastNodeAtDepth[depth - 1]
+ parent.children.append(node)
+ node.depth = parent.depth + 1
+ self.lastNodeAtDepth[node.depth] = node
+ return node
+
+ def makeDirs(self):
+ try:
+ os.makedirs(os.path.join(self.rootDir, 'META-INF'))
+ except OSError:
+ pass
+ try:
+ os.makedirs(os.path.join(self.rootDir, 'OEBPS'))
+ except OSError:
+ pass
+
+ def __writeContainerXML(self):
+ fout = open(os.path.join(self.rootDir, 'META-INF', 'container.xml'), 'w')
+ tmpl = self.loader.load('container.xml')
+ stream = tmpl.generate()
+ fout.write(stream.render('xml'))
+ fout.close()
+
+ def __writeTocNCX(self):
+ self.tocMapRoot.assignPlayOrder()
+ fout = open(os.path.join(self.rootDir, 'OEBPS', 'toc.ncx'), 'w')
+ tmpl = self.loader.load('toc.ncx')
+ stream = tmpl.generate(book = self)
+ fout.write(stream.render('xml'))
+ fout.close()
+
+ def __writeContentOPF(self):
+ fout = open(os.path.join(self.rootDir, 'OEBPS', 'content.opf'), 'w')
+ tmpl = self.loader.load('content.opf')
+ stream = tmpl.generate(book = self)
+ fout.write(stream.render('xml'))
+ fout.close()
+
+ def __writeItems(self):
+ for item in self.getAllItems():
+ print item.id, item.destPath
+ if item.html:
+ fout = open(os.path.join(self.rootDir, 'OEBPS', item.destPath), 'w')
+ fout.write(item.html)
+ fout.close()
+ else:
+ shutil.copyfile(item.srcPath, os.path.join(self.rootDir, 'OEBPS', item.destPath))
+
+
+ def __writeMimeType(self):
+ fout = open(os.path.join(self.rootDir, 'mimetype'), 'w')
+ fout.write('application/epub+zip')
+ fout.close()
+
+ @staticmethod
+ def __listManifestItems(contentOPFPath):
+ tree = etree.parse(contentOPFPath)
+ return tree.xpath("//opf:manifest/opf:item/@href", namespaces = {'opf': 'http://www.idpf.org/2007/opf'})
+
+ @staticmethod
+ def createArchive(rootDir, outputPath):
+ fout = zipfile.ZipFile(outputPath, 'w')
+ cwd = os.getcwd()
+ os.chdir(rootDir)
+ fout.write('mimetype', compress_type = zipfile.ZIP_STORED)
+ fileList = []
+ fileList.append(os.path.join('META-INF', 'container.xml'))
+ fileList.append(os.path.join('OEBPS', 'content.opf'))
+ for itemPath in EpubBook.__listManifestItems(os.path.join('OEBPS', 'content.opf')):
+ fileList.append(os.path.join('OEBPS', itemPath))
+ for filePath in fileList:
+ fout.write(filePath, compress_type = zipfile.ZIP_DEFLATED)
+ fout.close()
+ os.chdir(cwd)
+
+ @staticmethod
+ def checkEpub(checkerPath, epubPath):
+ subprocess.call(['java', '-jar', checkerPath, epubPath], shell = True)
+
+ def createBook(self, rootDir):
+ if self.titlePage:
+ self.__makeTitlePage()
+ if self.tocPage:
+ self.__makeTocPage()
+ self.rootDir = rootDir
+ self.makeDirs()
+ self.__writeMimeType()
+ self.__writeItems()
+ self.__writeContainerXML()
+ self.__writeContentOPF()
+ self.__writeTocNCX()
+
+
+def test():
+ def getMinimalHtml(text):
+ return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHtml 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>%s</title></head>
+<body><p>%s</p></body>
+</html>
+""" % (text, text)
+
+ book = EpubBook()
+ book.setTitle('Most Wanted Tips for Aspiring Young Pirates')
+ book.addCreator('Monkey D Luffy')
+ book.addCreator('Guybrush Threepwood')
+ book.addMeta('contributor', 'Smalltalk80', role = 'bkp')
+ book.addMeta('date', '2010', event = 'publication')
+
+ book.addTitlePage()
+ book.addTocPage()
+ book.addCover(r'D:\epub\blank.png')
+
+ book.addCss(r'main.css', 'main.css')
+
+ n1 = book.addHtml('', '1.html', getMinimalHtml('Chapter 1'))
+ n11 = book.addHtml('', '2.html', getMinimalHtml('Section 1.1'))
+ n111 = book.addHtml('', '3.html', getMinimalHtml('Subsection 1.1.1'))
+ n12 = book.addHtml('', '4.html', getMinimalHtml('Section 1.2'))
+ n2 = book.addHtml('', '5.html', getMinimalHtml('Chapter 2'))
+
+ book.addSpineItem(n1)
+ book.addSpineItem(n11)
+ book.addSpineItem(n111)
+ book.addSpineItem(n12)
+ book.addSpineItem(n2)
+
+ # You can use both forms to add TOC map
+ #t1 = book.addTocMapNode(n1.destPath, '1')
+ #t11 = book.addTocMapNode(n11.destPath, '1.1', parent = t1)
+ #t111 = book.addTocMapNode(n111.destPath, '1.1.1', parent = t11)
+ #t12 = book.addTocMapNode(n12.destPath, '1.2', parent = t1)
+ #t2 = book.addTocMapNode(n2.destPath, '2')
+
+ book.addTocMapNode(n1.destPath, '1')
+ book.addTocMapNode(n11.destPath, '1.1', 2)
+ book.addTocMapNode(n111.destPath, '1.1.1', 3)
+ book.addTocMapNode(n12.destPath, '1.2', 2)
+ book.addTocMapNode(n2.destPath, '2')
+
+ rootDir = r'd:\epub\test'
+ book.createBook(rootDir)
+ EpubBook.createArchive(rootDir, rootDir + '.epub')
+ #EpubBook.checkEpub('epubcheck-1.0.5.jar', rootDir + '.epub')
+
+if __name__ == '__main__':
+ test() \ No newline at end of file
diff --git a/epub/ez_epub.py b/epub/ez_epub.py
new file mode 100644
index 00000000..afd2dbff
--- /dev/null
+++ b/epub/ez_epub.py
@@ -0,0 +1,38 @@
+#! /usr/local/bin/python
+#-*- coding: utf-8 -*-
+
+import epub
+from genshi.template import TemplateLoader
+
+class Section:
+
+ def __init__(self):
+ self.title = ''
+ self.paragraphs = []
+ self.tocDepth = 1
+
+def makeBook(title, authors, sections, outputDir, lang='en-US', cover=None):
+ book = epub.EpubBook()
+ book.setLang(lang)
+ book.setTitle(title)
+ for author in authors:
+ book.addCreator(author)
+ #book.addTitlePage()
+ #book.addTocPage()
+ #if cover:
+ #book.addCover(cover)
+
+ loader = TemplateLoader('./epub/templates')
+ tmpl = loader.load('ez-section.html')
+
+ for i, section in enumerate(sections):
+ stream = tmpl.generate(section = section)
+ html = stream.render('xhtml', doctype='xhtml11', drop_xml_decl=False)
+ item = book.addHtml('', 's%d.html' % (i + 1), html)
+ book.addSpineItem(item)
+ book.addTocMapNode(item.destPath, section.title, section.tocDepth)
+
+ outputFile = outputDir + 'article.epub'
+ book.createBook(outputDir)
+ book.createArchive(outputDir, outputFile)
+ #book.checkEpub('epubcheck-1.0.5.jar', outputFile) \ No newline at end of file
diff --git a/epub/templates/container.xml b/epub/templates/container.xml
new file mode 100644
index 00000000..eecf7a0d
--- /dev/null
+++ b/epub/templates/container.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
+ <rootfiles>
+ <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
+ </rootfiles>
+</container>
diff --git a/epub/templates/content.opf b/epub/templates/content.opf
new file mode 100644
index 00000000..67f3f5c6
--- /dev/null
+++ b/epub/templates/content.opf
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<opf:package xmlns:opf="http://www.idpf.org/2007/opf"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:py="http://genshi.edgewall.org/"
+ unique-identifier="bookid" version="2.0">
+ <opf:metadata >
+ <dc:identifier id="bookid">urn:uuid:${book.UUID}</dc:identifier>
+ <dc:language>${book.lang}</dc:language>
+ <dc:title>${book.title}</dc:title>
+ <py:for each="name, role in book.creators">
+ <dc:creator opf:role="$role">$name</dc:creator>
+ </py:for>
+ <py:for each="beginTag, content, endTag in book.getMetaTags()">
+ ${Markup(beginTag)}$content${Markup(endTag)}
+ </py:for>
+ <opf:meta name="cover" content="${book.coverImage.id}" py:if="book.coverImage"/>
+ </opf:metadata>
+ <opf:manifest>
+ <opf:item id="ncxtoc" media-type="application/x-dtbncx+xml" href="toc.ncx"/>
+ <py:for each="item in book.getAllItems()">
+ <opf:item id="${item.id}" media-type="${item.mimeType}" href="${item.destPath}"/>
+ </py:for>
+ </opf:manifest>
+ <opf:spine toc="ncxtoc">
+ <py:for each="_, item, linear in book.getSpine()">
+ <opf:itemref idref="${item.id}" linear="${'yes' if linear else 'no'}"/>
+ </py:for>
+ </opf:spine>
+ <opf:guide py:if="book.guide">
+ <py:for each="href, title, type in book.getGuide()">
+ <opf:reference href="$href" type="$type" title="$title"/>
+ </py:for>
+ </opf:guide>
+</opf:package>
diff --git a/epub/templates/ez-section.html b/epub/templates/ez-section.html
new file mode 100644
index 00000000..0a715e7f
--- /dev/null
+++ b/epub/templates/ez-section.html
@@ -0,0 +1,17 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:py="http://genshi.edgewall.org/">
+<head>
+ <title>${section.title}</title>
+ <style type="text/css">
+h1 {
+ text-align: center;
+}
+ </style>
+</head>
+<body>
+ <h1>${section.title}</h1>
+ <py:for each="p in section.paragraphs">
+ <p>$p</p>
+ </py:for>
+</body>
+</html>
diff --git a/epub/templates/image.html b/epub/templates/image.html
new file mode 100644
index 00000000..9a838c7e
--- /dev/null
+++ b/epub/templates/image.html
@@ -0,0 +1,16 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:py="http://genshi.edgewall.org/">
+<head>
+ <title>${item.destPath}</title>
+ <style type="text/css">
+div, img {
+ border: 0;
+ margin: 0;
+ padding: 0;
+}
+ </style>
+</head>
+<body>
+ <div><img src="${item.destPath}" alt="${item.destPath}"/></div>
+</body>
+</html>
diff --git a/epub/templates/title-page.html b/epub/templates/title-page.html
new file mode 100644
index 00000000..de0f55f0
--- /dev/null
+++ b/epub/templates/title-page.html
@@ -0,0 +1,22 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:py="http://genshi.edgewall.org/">
+<head>
+ <title>${book.title}</title>
+ <style type="text/css">
+.title, .authors {
+ text-align: center;
+}
+span.author {
+ margin: 1em;
+}
+ </style>
+</head>
+<body>
+ <h1 class="title">${book.title}</h1>
+ <h3 class="authors">
+ <py:for each="creator, _ in book.creators">
+ <span class="author">$creator</span>
+ </py:for>
+ </h3>
+</body>
+</html>
diff --git a/epub/templates/toc.html b/epub/templates/toc.html
new file mode 100644
index 00000000..b14c9da3
--- /dev/null
+++ b/epub/templates/toc.html
@@ -0,0 +1,32 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+ xmlns:py="http://genshi.edgewall.org/">
+<head>
+ <title>${book.title}</title>
+ <style type="text/css">
+.tocEntry-1 {
+}
+.tocEntry-2 {
+ text-indent: 1em;
+}
+.tocEntry-3 {
+ text-indent: 2em;
+}
+.tocEntry-4 {
+ text-indent: 3em;
+}
+ </style>
+</head>
+<body>
+ <py:def function="tocEntry(node)">
+ <div class="tocEntry-${node.depth}">
+ <a href="${node.href}">${node.title}</a>
+ </div>
+ <py:for each="child in node.children">
+ ${tocEntry(child)}
+ </py:for>
+ </py:def>
+ <py:for each="child in book.getTocMapRoot().children">
+ ${tocEntry(child)}
+ </py:for>
+</body>
+</html>
diff --git a/epub/templates/toc.ncx b/epub/templates/toc.ncx
new file mode 100644
index 00000000..e7dd391a
--- /dev/null
+++ b/epub/templates/toc.ncx
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/"
+ xmlns:py="http://genshi.edgewall.org/"
+ version="2005-1">
+ <head>
+ <meta name="dtb:uid" content="urn:uuid:${book.UUID}"/>
+ <meta name="dtb:depth" content="${book.getTocMapHeight()}"/>
+ <meta name="dtb:totalPageCount" content="0"/>
+ <meta name="dtb:maxPageNumber" content="0"/>
+ </head>
+ <docTitle>
+ <text>${book.title}</text>
+ </docTitle>
+ <navMap>
+ <py:def function="navPoint(node)">
+ <navPoint id="navPoint-${node.playOrder}" playOrder="${node.playOrder}">
+ <navLabel><text>${node.title}</text></navLabel>
+ <content src="${node.href}"/>
+ <py:for each="child in node.children">
+ ${navPoint(child)}
+ </py:for>
+ </navPoint>
+ </py:def>
+ <py:for each="child in book.getTocMapRoot().children">
+ ${navPoint(child)}
+ </py:for>
+ </navMap>
+</ncx>
bgstack15