112 lines
3.9 KiB
Python
112 lines
3.9 KiB
Python
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import zipfile
|
||
|
from lxml import etree
|
||
|
import sys
|
||
|
import os
|
||
|
from operator import itemgetter
|
||
|
from re import split
|
||
|
import multiprocessing, subprocess
|
||
|
|
||
|
tempdir="/dev/shm"
|
||
|
cwd=os.getcwd()
|
||
|
|
||
|
ns = {
|
||
|
'n':'urn:oasis:names:tc:opendocument:xmlns:container',
|
||
|
'pkg':'http://www.idpf.org/2007/opf',
|
||
|
'dc':'http://purl.org/dc/elements/1.1/',
|
||
|
'ncx':'http://www.daisy.org/z3986/2005/ncx/',
|
||
|
'xhtml':'http://www.w3.org/1999/xhtml'
|
||
|
}
|
||
|
|
||
|
if len(sys.argv) not in [2,3]:
|
||
|
print "usage: %d epub [number]"
|
||
|
exit(1)
|
||
|
|
||
|
fzip = zipfile.ZipFile(sys.argv[1])
|
||
|
txt = fzip.read('META-INF/container.xml')
|
||
|
tree = etree.fromstring(txt)
|
||
|
cfname = tree.xpath('n:rootfiles/n:rootfile/@full-path',namespaces=ns)[0]
|
||
|
|
||
|
pdir = os.path.dirname(cfname)
|
||
|
|
||
|
cf = fzip.read(cfname)
|
||
|
tree = etree.fromstring(cf)
|
||
|
ncxname = tree.xpath('/pkg:package/pkg:manifest/pkg:item[@id="ncx"]/@href',namespaces=ns)[0]
|
||
|
|
||
|
ncx = fzip.read(os.path.join(pdir, ncxname))
|
||
|
tree = etree.fromstring(ncx)
|
||
|
if len(sys.argv) == 2:
|
||
|
navpoints = tree.xpath('/ncx:ncx/ncx:navMap/ncx:navPoint',namespaces=ns)
|
||
|
else:
|
||
|
# the following xpath expression finds the subtree for the book we want to print
|
||
|
navpoints = tree.xpath("/ncx:ncx/ncx:navMap/ncx:navPoint[starts-with(ncx:navLabel/ncx:text, 'Nr. %s')]/ncx:navPoint"%sys.argv[2],namespaces=ns)
|
||
|
|
||
|
title = tree.xpath("/ncx:ncx/ncx:navMap/ncx:navPoint/ncx:navLabel/ncx:text[starts-with(., 'Nr. %s')]/text()"%sys.argv[2], namespaces=ns)[0]
|
||
|
|
||
|
dnavpoint = list()
|
||
|
|
||
|
for navpoint in navpoints:
|
||
|
r = lambda expr: navpoint.xpath(expr, namespaces=ns)[0]
|
||
|
label = r('ncx:navLabel/ncx:text/text()')
|
||
|
if label not in ['Cover', 'PERRY RHODAN - die Serie', 'Impressum']:
|
||
|
order = int(r('@playOrder'))
|
||
|
content = r('ncx:content/@src')
|
||
|
dnavpoint.append((order, content))
|
||
|
|
||
|
# to be able to work offline, this needs the w3c-sgml-lib package
|
||
|
parser = etree.XMLParser(load_dtd=True)
|
||
|
i = 0
|
||
|
tasks = []
|
||
|
for _, pagename in sorted(dnavpoint, key=itemgetter(0)):
|
||
|
page = fzip.read(os.path.join(pdir, pagename))
|
||
|
tree = etree.fromstring(page, parser)
|
||
|
paragraphs = tree.xpath('/xhtml:html/xhtml:body/xhtml:p', namespaces=ns)
|
||
|
for p in paragraphs:
|
||
|
p = split(r"(\xbb.+?\xab)", p.xpath('string()'))
|
||
|
for s in p:
|
||
|
s = s.strip(', .')
|
||
|
if s == '':
|
||
|
continue
|
||
|
if s == u'\xa0':
|
||
|
tasks.append(["ln", "-s", "%s/silence.wav"%cwd, "%s/%04d.wav"%(tempdir,i)])
|
||
|
i+=1
|
||
|
continue
|
||
|
|
||
|
if s.startswith(u'\xbb') and s.endswith(u'\xab'):
|
||
|
tasks.append(["wine", "sapi2wav.exe", "%s/%04d.wav"%(tempdir, i), "3", "-t", s])
|
||
|
else:
|
||
|
tasks.append(["wine", "sapi2wav.exe", "%s/%04d.wav"%(tempdir, i), "2", "-t", s])
|
||
|
i+=1
|
||
|
tasks.append(["ln", "-s", "%s/silence.wav"%cwd, "%s/%04d.wav"%(tempdir,i)])
|
||
|
i+=1
|
||
|
tasks.append(["ln", "-s", "%s/silence.wav"%cwd, "%s/%04d.wav"%(tempdir,i)])
|
||
|
i+=1
|
||
|
|
||
|
wavs = ["%s/%04d.wav"%(tempdir,j) for j in range(i)]
|
||
|
for wav in wavs:
|
||
|
if os.path.exists(wav):
|
||
|
os.unlink(wav)
|
||
|
|
||
|
def worker(cmd):
|
||
|
with open(os.devnull, "w") as fnull:
|
||
|
subprocess.call(cmd, shell=False, stdout = fnull, stderr = fnull)
|
||
|
cpucount = multiprocessing.cpu_count()
|
||
|
cpucount = 1
|
||
|
pool = multiprocessing.Pool(processes=cpucount)
|
||
|
num_tasks = float(len(tasks))
|
||
|
for i,_ in enumerate(pool.imap_unordered(worker, tasks)):
|
||
|
sys.stdout.write("%f\r"%(100*i/num_tasks))
|
||
|
sys.stdout.flush()
|
||
|
|
||
|
p1 = subprocess.Popen(["./concatenate_wav"]+wavs, stdout=subprocess.PIPE)
|
||
|
p2 = subprocess.Popen(["./stride"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||
|
p3 = subprocess.Popen(["sox", "--show-progress", "--type", "raw", "--rate", "22050", "--encoding", "signed-integer", "--bits", "16", "--channels", "1", "-", "--rate", "22050", "--comment", "", "--compression", "0", "%s.ogg"%title, "tempo", "-s", "2.0"], stdin=p2.stdout, stdout=None)
|
||
|
p1.stdout.close()
|
||
|
p2.stdout.close()
|
||
|
p3.wait()
|
||
|
|
||
|
for wav in wavs:
|
||
|
os.unlink(wav)
|