diff options
author | cedricbonhomme <devnull@localhost> | 2012-04-15 18:59:50 +0200 |
---|---|---|
committer | cedricbonhomme <devnull@localhost> | 2012-04-15 18:59:50 +0200 |
commit | e6472738b5253aa328f8b2a4f4f2a23abc8582c2 (patch) | |
tree | c61704deed1d3cb37f5e3961794896c6dd115ba5 /source/epub | |
parent | Better use of datetime. (diff) | |
download | newspipe-e6472738b5253aa328f8b2a4f4f2a23abc8582c2.tar.gz newspipe-e6472738b5253aa328f8b2a4f4f2a23abc8582c2.tar.bz2 newspipe-e6472738b5253aa328f8b2a4f4f2a23abc8582c2.zip |
Reorganization of folders.
Diffstat (limited to 'source/epub')
-rw-r--r-- | source/epub/__init__.py | 1 | ||||
-rw-r--r-- | source/epub/epub.py | 343 | ||||
-rw-r--r-- | source/epub/ez_epub.py | 36 | ||||
-rw-r--r-- | source/epub/templates/container.xml | 6 | ||||
-rw-r--r-- | source/epub/templates/content.opf | 34 | ||||
-rw-r--r-- | source/epub/templates/ez-section.html | 17 | ||||
-rw-r--r-- | source/epub/templates/image.html | 16 | ||||
-rw-r--r-- | source/epub/templates/title-page.html | 22 | ||||
-rw-r--r-- | source/epub/templates/toc.html | 32 | ||||
-rw-r--r-- | source/epub/templates/toc.ncx | 28 |
10 files changed, 535 insertions, 0 deletions
diff --git a/source/epub/__init__.py b/source/epub/__init__.py new file mode 100644 index 00000000..8d1c8b69 --- /dev/null +++ b/source/epub/__init__.py @@ -0,0 +1 @@ + diff --git a/source/epub/epub.py b/source/epub/epub.py new file mode 100644 index 00000000..2c01b54a --- /dev/null +++ b/source/epub/epub.py @@ -0,0 +1,343 @@ +#! /usr/local/bin/python
+#-*- coding: utf-8 -*-
+
+import itertools
+import mimetypes
+import os
+import shutil
+import subprocess
+import uuid
+import zipfile
+from genshi.template import TemplateLoader
+from lxml import etree
+
+class TocMapNode:
+ def __init__(self):
+ self.playOrder = 0
+ self.title = ''
+ self.href = ''
+ self.children = []
+ self.depth = 0
+
+ def assignPlayOrder(self):
+ nextPlayOrder = [0]
+ self.__assignPlayOrder(nextPlayOrder)
+
+ def __assignPlayOrder(self, nextPlayOrder):
+ self.playOrder = nextPlayOrder[0]
+ nextPlayOrder[0] = self.playOrder + 1
+ for child in self.children:
+ child.__assignPlayOrder(nextPlayOrder)
+
+class EpubItem:
+ def __init__(self):
+ self.id = ''
+ self.srcPath = ''
+ self.destPath = ''
+ self.mimeType = ''
+ self.html = ''
+
+class EpubBook:
+ def __init__(self):
+ self.loader = TemplateLoader('./epub/templates')
+
+ self.rootDir = ''
+ self.UUID = uuid.uuid1()
+
+ self.lang = 'en-US'
+ self.title = ''
+ self.creators = []
+ self.metaInfo = []
+
+ self.imageItems = {}
+ self.htmlItems = {}
+ self.cssItems = {}
+
+ self.coverImage = None
+ self.titlePage = None
+ self.tocPage = None
+
+ self.spine = []
+ self.guide = {}
+ self.tocMapRoot = TocMapNode()
+ self.lastNodeAtDepth = {0 : self.tocMapRoot}
+
+ def setTitle(self, title):
+ self.title = title
+
+ def setLang(self, lang):
+ self.lang = lang
+
+ def addCreator(self, name, role = 'aut'):
+ self.creators.append((name, role))
+
+ def addMeta(self, metaName, metaValue, **metaAttrs):
+ self.metaInfo.append((metaName, metaValue, metaAttrs))
+
+ def getMetaTags(self):
+ l = []
+ for metaName, metaValue, metaAttr in self.metaInfo:
+ beginTag = '<dc:%s' % metaName
+ if metaAttr:
+ for attrName, attrValue in metaAttr.iteritems():
+ beginTag += ' opf:%s="%s"' % (attrName, attrValue)
+ beginTag += '>'
+ endTag = '</dc:%s>' % metaName
+ l.append((beginTag, metaValue, endTag))
+ return l
+
+ def getImageItems(self):
+ return sorted(self.imageItems.values(), key = lambda x : x.id)
+
+ def getHtmlItems(self):
+ return sorted(self.htmlItems.values(), key = lambda x : x.id)
+
+ def getCssItems(self):
+ return sorted(self.cssItems.values(), key = lambda x : x.id)
+
+ def getAllItems(self):
+ return sorted(itertools.chain(self.imageItems.values(), self.htmlItems.values(), self.cssItems.values()), key = lambda x : x.id)
+
+ def addImage(self, srcPath, destPath):
+ item = EpubItem()
+ item.id = 'image_%d' % (len(self.imageItems) + 1)
+ item.srcPath = srcPath
+ item.destPath = destPath
+ item.mimeType = mimetypes.guess_type(destPath)[0]
+ assert item.destPath not in self.imageItems
+ self.imageItems[destPath] = item
+ return item
+
+ def addHtmlForImage(self, imageItem):
+ tmpl = self.loader.load('image.html')
+ stream = tmpl.generate(book = self, item = imageItem)
+ html = stream.render('xhtml', doctype = 'xhtml11', drop_xml_decl = False)
+ return self.addHtml('', '%s.html' % imageItem.destPath, html)
+
+ def addHtml(self, srcPath, destPath, html):
+ item = EpubItem()
+ item.id = 'html_%d' % (len(self.htmlItems) + 1)
+ item.srcPath = srcPath
+ item.destPath = destPath
+ item.html = html
+ item.mimeType = 'application/xhtml+xml'
+ assert item.destPath not in self.htmlItems
+ self.htmlItems[item.destPath] = item
+ return item
+
+ def addCss(self, srcPath, destPath):
+ item = EpubItem()
+ item.id = 'css_%d' % (len(self.cssItems) + 1)
+ item.srcPath = srcPath
+ item.destPath = destPath
+ item.mimeType = 'text/css'
+ assert item.destPath not in self.cssItems
+ self.cssItems[item.destPath] = item
+ return item
+
+ def addCover(self, srcPath):
+ assert not self.coverImage
+ _, ext = os.path.splitext(srcPath)
+ destPath = 'cover%s' % ext
+ self.coverImage = self.addImage(srcPath, destPath)
+ #coverPage = self.addHtmlForImage(self.coverImage)
+ #self.addSpineItem(coverPage, False, -300)
+ #self.addGuideItem(coverPage.destPath, 'Cover', 'cover')
+
+ def __makeTitlePage(self):
+ assert self.titlePage
+ if self.titlePage.html:
+ return
+ tmpl = self.loader.load('title-page.html')
+ stream = tmpl.generate(book = self)
+ self.titlePage.html = stream.render('xhtml', doctype = 'xhtml11', drop_xml_decl = False)
+
+ def addTitlePage(self, html = ''):
+ assert not self.titlePage
+ self.titlePage = self.addHtml('', 'title-page.html', html)
+ self.addSpineItem(self.titlePage, True, -200)
+ self.addGuideItem('title-page.html', 'Title Page', 'title-page')
+
+ def __makeTocPage(self):
+ assert self.tocPage
+ tmpl = self.loader.load('toc.html')
+ stream = tmpl.generate(book = self)
+ self.tocPage.html = stream.render('xhtml', doctype = 'xhtml11', drop_xml_decl = False)
+
+ def addTocPage(self):
+ assert not self.tocPage
+ self.tocPage = self.addHtml('', 'toc.html', '')
+ self.addSpineItem(self.tocPage, False, -100)
+ self.addGuideItem('toc.html', 'Table of Contents', 'toc')
+
+ def getSpine(self):
+ return sorted(self.spine)
+
+ def addSpineItem(self, item, linear = True, order = None):
+ assert item.destPath in self.htmlItems
+ if order == None:
+ order = (max(order for order, _, _ in self.spine) if self.spine else 0) + 1
+ self.spine.append((order, item, linear))
+
+ def getGuide(self):
+ return sorted(self.guide.values(), key = lambda x : x[2])
+
+ def addGuideItem(self, href, title, type):
+ assert type not in self.guide
+ self.guide[type] = (href, title, type)
+
+ def getTocMapRoot(self):
+ return self.tocMapRoot
+
+ def getTocMapHeight(self):
+ return max(self.lastNodeAtDepth.keys())
+
+ def addTocMapNode(self, href, title, depth = None, parent = None):
+ node = TocMapNode()
+ node.href = href
+ node.title = title
+ if parent == None:
+ if depth == None:
+ parent = self.tocMapRoot
+ else:
+ parent = self.lastNodeAtDepth[depth - 1]
+ parent.children.append(node)
+ node.depth = parent.depth + 1
+ self.lastNodeAtDepth[node.depth] = node
+ return node
+
+ def makeDirs(self):
+ try:
+ os.makedirs(os.path.join(self.rootDir, 'META-INF'))
+ except OSError:
+ pass
+ try:
+ os.makedirs(os.path.join(self.rootDir, 'OEBPS'))
+ except OSError:
+ pass
+
+ def __writeContainerXML(self):
+ fout = open(os.path.join(self.rootDir, 'META-INF', 'container.xml'), 'w')
+ tmpl = self.loader.load('container.xml')
+ stream = tmpl.generate()
+ fout.write(stream.render('xml'))
+ fout.close()
+
+ def __writeTocNCX(self):
+ self.tocMapRoot.assignPlayOrder()
+ fout = open(os.path.join(self.rootDir, 'OEBPS', 'toc.ncx'), 'w')
+ tmpl = self.loader.load('toc.ncx')
+ stream = tmpl.generate(book = self)
+ fout.write(stream.render('xml'))
+ fout.close()
+
+ def __writeContentOPF(self):
+ fout = open(os.path.join(self.rootDir, 'OEBPS', 'content.opf'), 'w')
+ tmpl = self.loader.load('content.opf')
+ stream = tmpl.generate(book = self)
+ fout.write(stream.render('xml'))
+ fout.close()
+
+ def __writeItems(self):
+ for item in self.getAllItems():
+ #print item.id, item.destPath
+ if item.html:
+ fout = open(os.path.join(self.rootDir, 'OEBPS', item.destPath), 'w')
+ fout.write(item.html)
+ fout.close()
+ else:
+ shutil.copyfile(item.srcPath, os.path.join(self.rootDir, 'OEBPS', item.destPath))
+
+
+ def __writeMimeType(self):
+ fout = open(os.path.join(self.rootDir, 'mimetype'), 'w')
+ fout.write('application/epub+zip')
+ fout.close()
+
+ @staticmethod
+ def __listManifestItems(contentOPFPath):
+ tree = etree.parse(contentOPFPath)
+ return tree.xpath("//opf:manifest/opf:item/@href", namespaces = {'opf': 'http://www.idpf.org/2007/opf'})
+
+ @staticmethod
+ def createArchive(rootDir, outputPath):
+ fout = zipfile.ZipFile(outputPath, 'w')
+ cwd = os.getcwd()
+ os.chdir(rootDir)
+ fout.write('mimetype', compress_type = zipfile.ZIP_STORED)
+ fileList = []
+ fileList.append(os.path.join('META-INF', 'container.xml'))
+ fileList.append(os.path.join('OEBPS', 'content.opf'))
+ for itemPath in EpubBook.__listManifestItems(os.path.join('OEBPS', 'content.opf')):
+ fileList.append(os.path.join('OEBPS', itemPath))
+ for filePath in fileList:
+ fout.write(filePath, compress_type = zipfile.ZIP_DEFLATED)
+ fout.close()
+ os.chdir(cwd)
+
+ def createBook(self, rootDir):
+ if self.titlePage:
+ self.__makeTitlePage()
+ if self.tocPage:
+ self.__makeTocPage()
+ self.rootDir = rootDir
+ self.makeDirs()
+ self.__writeMimeType()
+ self.__writeItems()
+ self.__writeContainerXML()
+ self.__writeContentOPF()
+ self.__writeTocNCX()
+
+def test():
+ def getMinimalHtml(text):
+ return """<!DOCTYPE html PUBLIC "-//W3C//DTD XHtml 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>%s</title></head>
+<body><p>%s</p></body>
+</html>
+""" % (text, text)
+
+ book = EpubBook()
+ book.setTitle('Most Wanted Tips for Aspiring Young Pirates')
+ book.addCreator('Monkey D Luffy')
+ book.addCreator('Guybrush Threepwood')
+ book.addMeta('contributor', 'Smalltalk80', role = 'bkp')
+ book.addMeta('date', '2010', event = 'publication')
+
+ book.addTitlePage()
+ book.addTocPage()
+ book.addCover(r'D:\epub\blank.png')
+
+ book.addCss(r'main.css', 'main.css')
+
+ n1 = book.addHtml('', '1.html', getMinimalHtml('Chapter 1'))
+ n11 = book.addHtml('', '2.html', getMinimalHtml('Section 1.1'))
+ n111 = book.addHtml('', '3.html', getMinimalHtml('Subsection 1.1.1'))
+ n12 = book.addHtml('', '4.html', getMinimalHtml('Section 1.2'))
+ n2 = book.addHtml('', '5.html', getMinimalHtml('Chapter 2'))
+
+ book.addSpineItem(n1)
+ book.addSpineItem(n11)
+ book.addSpineItem(n111)
+ book.addSpineItem(n12)
+ book.addSpineItem(n2)
+
+ # You can use both forms to add TOC map
+ #t1 = book.addTocMapNode(n1.destPath, '1')
+ #t11 = book.addTocMapNode(n11.destPath, '1.1', parent = t1)
+ #t111 = book.addTocMapNode(n111.destPath, '1.1.1', parent = t11)
+ #t12 = book.addTocMapNode(n12.destPath, '1.2', parent = t1)
+ #t2 = book.addTocMapNode(n2.destPath, '2')
+
+ book.addTocMapNode(n1.destPath, '1')
+ book.addTocMapNode(n11.destPath, '1.1', 2)
+ book.addTocMapNode(n111.destPath, '1.1.1', 3)
+ book.addTocMapNode(n12.destPath, '1.2', 2)
+ book.addTocMapNode(n2.destPath, '2')
+
+ rootDir = r'd:\epub\test'
+ book.createBook(rootDir)
+ EpubBook.createArchive(rootDir, rootDir + '.epub')
+
+if __name__ == '__main__':
+ test()
\ No newline at end of file diff --git a/source/epub/ez_epub.py b/source/epub/ez_epub.py new file mode 100644 index 00000000..ecfd4f5a --- /dev/null +++ b/source/epub/ez_epub.py @@ -0,0 +1,36 @@ +#! /usr/local/bin/python
+#-*- coding: utf-8 -*-
+
+import epub
+from genshi.template import TemplateLoader
+
+class Section:
+ def __init__(self):
+ self.title = ''
+ self.paragraphs = []
+ self.tocDepth = 1
+
+def makeBook(title, authors, sections, outputDir, lang='en-US', cover=None):
+ book = epub.EpubBook()
+ book.setLang(lang)
+ book.setTitle(title)
+ for author in authors:
+ book.addCreator(author)
+ #book.addTitlePage()
+ #book.addTocPage()
+ #if cover:
+ #book.addCover(cover)
+
+ loader = TemplateLoader('./epub/templates')
+ tmpl = loader.load('ez-section.html')
+
+ for i, section in enumerate(sections):
+ stream = tmpl.generate(section = section)
+ html = stream.render('xhtml', doctype='xhtml11', drop_xml_decl=False)
+ item = book.addHtml('', 's%d.html' % (i + 1), html)
+ book.addSpineItem(item)
+ book.addTocMapNode(item.destPath, section.title, section.tocDepth)
+
+ outputFile = outputDir + 'article.epub'
+ book.createBook(outputDir)
+ book.createArchive(outputDir, outputFile)
\ No newline at end of file diff --git a/source/epub/templates/container.xml b/source/epub/templates/container.xml new file mode 100644 index 00000000..eecf7a0d --- /dev/null +++ b/source/epub/templates/container.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0"> + <rootfiles> + <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/> + </rootfiles> +</container> diff --git a/source/epub/templates/content.opf b/source/epub/templates/content.opf new file mode 100644 index 00000000..67f3f5c6 --- /dev/null +++ b/source/epub/templates/content.opf @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="utf-8" standalone="no"?> +<opf:package xmlns:opf="http://www.idpf.org/2007/opf" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:py="http://genshi.edgewall.org/" + unique-identifier="bookid" version="2.0"> + <opf:metadata > + <dc:identifier id="bookid">urn:uuid:${book.UUID}</dc:identifier> + <dc:language>${book.lang}</dc:language> + <dc:title>${book.title}</dc:title> + <py:for each="name, role in book.creators"> + <dc:creator opf:role="$role">$name</dc:creator> + </py:for> + <py:for each="beginTag, content, endTag in book.getMetaTags()"> + ${Markup(beginTag)}$content${Markup(endTag)} + </py:for> + <opf:meta name="cover" content="${book.coverImage.id}" py:if="book.coverImage"/> + </opf:metadata> + <opf:manifest> + <opf:item id="ncxtoc" media-type="application/x-dtbncx+xml" href="toc.ncx"/> + <py:for each="item in book.getAllItems()"> + <opf:item id="${item.id}" media-type="${item.mimeType}" href="${item.destPath}"/> + </py:for> + </opf:manifest> + <opf:spine toc="ncxtoc"> + <py:for each="_, item, linear in book.getSpine()"> + <opf:itemref idref="${item.id}" linear="${'yes' if linear else 'no'}"/> + </py:for> + </opf:spine> + <opf:guide py:if="book.guide"> + <py:for each="href, title, type in book.getGuide()"> + <opf:reference href="$href" type="$type" title="$title"/> + </py:for> + </opf:guide> +</opf:package> diff --git a/source/epub/templates/ez-section.html b/source/epub/templates/ez-section.html new file mode 100644 index 00000000..0a715e7f --- /dev/null +++ b/source/epub/templates/ez-section.html @@ -0,0 +1,17 @@ +<html xmlns="http://www.w3.org/1999/xhtml" + xmlns:py="http://genshi.edgewall.org/"> +<head> + <title>${section.title}</title> + <style type="text/css"> +h1 { + text-align: center; +} + </style> +</head> +<body> + <h1>${section.title}</h1> + <py:for each="p in section.paragraphs"> + <p>$p</p> + </py:for> +</body> +</html> diff --git a/source/epub/templates/image.html b/source/epub/templates/image.html new file mode 100644 index 00000000..9a838c7e --- /dev/null +++ b/source/epub/templates/image.html @@ -0,0 +1,16 @@ +<html xmlns="http://www.w3.org/1999/xhtml" + xmlns:py="http://genshi.edgewall.org/"> +<head> + <title>${item.destPath}</title> + <style type="text/css"> +div, img { + border: 0; + margin: 0; + padding: 0; +} + </style> +</head> +<body> + <div><img src="${item.destPath}" alt="${item.destPath}"/></div> +</body> +</html> diff --git a/source/epub/templates/title-page.html b/source/epub/templates/title-page.html new file mode 100644 index 00000000..de0f55f0 --- /dev/null +++ b/source/epub/templates/title-page.html @@ -0,0 +1,22 @@ +<html xmlns="http://www.w3.org/1999/xhtml" + xmlns:py="http://genshi.edgewall.org/"> +<head> + <title>${book.title}</title> + <style type="text/css"> +.title, .authors { + text-align: center; +} +span.author { + margin: 1em; +} + </style> +</head> +<body> + <h1 class="title">${book.title}</h1> + <h3 class="authors"> + <py:for each="creator, _ in book.creators"> + <span class="author">$creator</span> + </py:for> + </h3> +</body> +</html> diff --git a/source/epub/templates/toc.html b/source/epub/templates/toc.html new file mode 100644 index 00000000..b14c9da3 --- /dev/null +++ b/source/epub/templates/toc.html @@ -0,0 +1,32 @@ +<html xmlns="http://www.w3.org/1999/xhtml" + xmlns:py="http://genshi.edgewall.org/"> +<head> + <title>${book.title}</title> + <style type="text/css"> +.tocEntry-1 { +} +.tocEntry-2 { + text-indent: 1em; +} +.tocEntry-3 { + text-indent: 2em; +} +.tocEntry-4 { + text-indent: 3em; +} + </style> +</head> +<body> + <py:def function="tocEntry(node)"> + <div class="tocEntry-${node.depth}"> + <a href="${node.href}">${node.title}</a> + </div> + <py:for each="child in node.children"> + ${tocEntry(child)} + </py:for> + </py:def> + <py:for each="child in book.getTocMapRoot().children"> + ${tocEntry(child)} + </py:for> +</body> +</html> diff --git a/source/epub/templates/toc.ncx b/source/epub/templates/toc.ncx new file mode 100644 index 00000000..e7dd391a --- /dev/null +++ b/source/epub/templates/toc.ncx @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" + xmlns:py="http://genshi.edgewall.org/" + version="2005-1"> + <head> + <meta name="dtb:uid" content="urn:uuid:${book.UUID}"/> + <meta name="dtb:depth" content="${book.getTocMapHeight()}"/> + <meta name="dtb:totalPageCount" content="0"/> + <meta name="dtb:maxPageNumber" content="0"/> + </head> + <docTitle> + <text>${book.title}</text> + </docTitle> + <navMap> + <py:def function="navPoint(node)"> + <navPoint id="navPoint-${node.playOrder}" playOrder="${node.playOrder}"> + <navLabel><text>${node.title}</text></navLabel> + <content src="${node.href}"/> + <py:for each="child in node.children"> + ${navPoint(child)} + </py:for> + </navPoint> + </py:def> + <py:for each="child in book.getTocMapRoot().children"> + ${navPoint(child)} + </py:for> + </navMap> +</ncx> |