src/tests/__init__.py: use pikepdf instead of pdfrw
This commit is contained in:
parent
c808061b4b
commit
997fe8efd8
1 changed files with 47 additions and 108 deletions
|
@ -7,57 +7,11 @@ import sys
|
||||||
import zlib
|
import zlib
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import StringIO, BytesIO, TextIOWrapper
|
from io import StringIO, BytesIO, TextIOWrapper
|
||||||
|
import pikepdf
|
||||||
|
import decimal
|
||||||
|
|
||||||
HERE = os.path.dirname(__file__)
|
HERE = os.path.dirname(__file__)
|
||||||
|
|
||||||
PdfReaderIO = StringIO
|
|
||||||
|
|
||||||
# Recompressing the image stream makes the comparison robust against output
|
|
||||||
# preserving changes in the zlib compress output bitstream
|
|
||||||
# (e.g. between different zlib implementations/versions/releases).
|
|
||||||
# Without this, some img2pdf 0.3.2 tests fail on Fedora 29/aarch64.
|
|
||||||
# See also:
|
|
||||||
# https://gitlab.mister-muffin.de/josch/img2pdf/issues/51
|
|
||||||
# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/R7GD4L5Z6HELCDAL2RDESWR2F3ZXHWVX/
|
|
||||||
def recompress_last_stream(bs):
|
|
||||||
length_pos = bs.rindex(b'/Length')
|
|
||||||
li = length_pos + 8
|
|
||||||
lj = bs.index(b' ', li)
|
|
||||||
n = int(bs[li:lj])
|
|
||||||
stream_pos = bs.index(b'\nstream\n', lj)
|
|
||||||
si = stream_pos + 8
|
|
||||||
sj = si + n
|
|
||||||
startx_pos = bs.rindex(b'\nstartxref\n')
|
|
||||||
xi = startx_pos + 11
|
|
||||||
xj = bs.index(b'\n', xi)
|
|
||||||
m = int(bs[xi:xj])
|
|
||||||
|
|
||||||
unc_t = zlib.decompress(bs[si:sj])
|
|
||||||
t = zlib.compress(unc_t)
|
|
||||||
|
|
||||||
new_len = str(len(t)).encode('ascii')
|
|
||||||
u = (lj-li) + n
|
|
||||||
v = len(new_len) + len(t)
|
|
||||||
off = v - u
|
|
||||||
|
|
||||||
rs = (bs[:li] + new_len + bs[lj:si] + t + bs[sj:xi]
|
|
||||||
+ str(m+off).encode('ascii') + bs[xj:])
|
|
||||||
|
|
||||||
return rs
|
|
||||||
|
|
||||||
def compare_pdf(outx, outy):
|
|
||||||
if b'/FlateDecode' in outx:
|
|
||||||
x = recompress_last_stream(outx)
|
|
||||||
y = recompress_last_stream(outy)
|
|
||||||
if x != y:
|
|
||||||
print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr)
|
|
||||||
print('recompressed outx:\n{}\nouty:\n{}\n'.format(x, y), file=sys.stderr)
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
if outx != outy:
|
|
||||||
print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr)
|
|
||||||
return True
|
|
||||||
|
|
||||||
# convert +set date:create +set date:modify -define png:exclude-chunk=time
|
# convert +set date:create +set date:modify -define png:exclude-chunk=time
|
||||||
|
|
||||||
# we define some variables so that the table below can be narrower
|
# we define some variables so that the table below can be narrower
|
||||||
|
@ -535,34 +489,24 @@ def test_suite():
|
||||||
assert os.path.isfile(outputf)
|
assert os.path.isfile(outputf)
|
||||||
|
|
||||||
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
|
def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
|
||||||
try:
|
|
||||||
from pdfrw import PdfReader, PdfName, PdfWriter
|
|
||||||
from pdfrw.py23_diffs import convert_load, convert_store
|
|
||||||
except ImportError:
|
|
||||||
# the test requires pdfrw
|
|
||||||
self.skipTest("this test requires pdfrw")
|
|
||||||
return
|
|
||||||
with open(f, "rb") as inf:
|
with open(f, "rb") as inf:
|
||||||
orig_imgdata = inf.read()
|
orig_imgdata = inf.read()
|
||||||
output = img2pdf.convert(orig_imgdata, nodate=True,
|
output = img2pdf.convert(orig_imgdata, nodate=True,
|
||||||
with_pdfrw=with_pdfrw)
|
with_pdfrw=with_pdfrw)
|
||||||
x = PdfReader(PdfReaderIO(convert_load(output)))
|
x = pikepdf.open(BytesIO(output))
|
||||||
self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
|
self.assertIn(x.Root.Pages.Count, (1, 2))
|
||||||
PdfName.Size])
|
|
||||||
self.assertIn(x.Root.Pages.Count, ('1', '2'))
|
|
||||||
if len(x.Root.Pages.Kids) == '1':
|
if len(x.Root.Pages.Kids) == '1':
|
||||||
self.assertEqual(x.Size, '7')
|
self.assertEqual(x.Size, '7')
|
||||||
self.assertEqual(len(x.Root.Pages.Kids), 1)
|
self.assertEqual(len(x.Root.Pages.Kids), 1)
|
||||||
elif len(x.Root.Pages.Kids) == '2':
|
elif len(x.Root.Pages.Kids) == '2':
|
||||||
self.assertEqual(x.Size, '10')
|
self.assertEqual(x.Size, '10')
|
||||||
self.assertEqual(len(x.Root.Pages.Kids), 2)
|
self.assertEqual(len(x.Root.Pages.Kids), 2)
|
||||||
self.assertEqual(x.Info, {})
|
self.assertEqual(sorted(x.Root.keys()), ["/Pages",
|
||||||
self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
|
"/Type"])
|
||||||
PdfName.Type])
|
self.assertEqual(x.Root.Type, "/Catalog")
|
||||||
self.assertEqual(x.Root.Type, PdfName.Catalog)
|
|
||||||
self.assertEqual(sorted(x.Root.Pages.keys()),
|
self.assertEqual(sorted(x.Root.Pages.keys()),
|
||||||
[PdfName.Count, PdfName.Kids, PdfName.Type])
|
["/Count", "/Kids", "/Type"])
|
||||||
self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
|
self.assertEqual(x.Root.Pages.Type, "/Pages")
|
||||||
orig_img = Image.open(f)
|
orig_img = Image.open(f)
|
||||||
for pagenum in range(len(x.Root.Pages.Kids)):
|
for pagenum in range(len(x.Root.Pages.Kids)):
|
||||||
# retrieve the original image frame that this page was
|
# retrieve the original image frame that this page was
|
||||||
|
@ -583,57 +527,54 @@ def test_suite():
|
||||||
|
|
||||||
def format_float(f):
|
def format_float(f):
|
||||||
if int(f) == f:
|
if int(f) == f:
|
||||||
return str(int(f))
|
return int(f)
|
||||||
else:
|
else:
|
||||||
return ("%.4f" % f).rstrip("0")
|
return decimal.Decimal("%.4f" % f)
|
||||||
|
|
||||||
self.assertEqual(sorted(cur_page.keys()),
|
self.assertEqual(sorted(cur_page.keys()),
|
||||||
[PdfName.Contents, PdfName.MediaBox,
|
["/Contents", "/MediaBox",
|
||||||
PdfName.Parent, PdfName.Resources,
|
"/Parent", "/Resources",
|
||||||
PdfName.Type])
|
"/Type"])
|
||||||
self.assertEqual(cur_page.MediaBox,
|
self.assertEqual(cur_page.MediaBox,
|
||||||
['0', '0', format_float(pagewidth),
|
pikepdf.Array([0, 0, format_float(pagewidth),
|
||||||
format_float(pageheight)])
|
format_float(pageheight)]))
|
||||||
self.assertEqual(cur_page.Parent, x.Root.Pages)
|
self.assertEqual(cur_page.Parent, x.Root.Pages)
|
||||||
self.assertEqual(cur_page.Type, PdfName.Page)
|
self.assertEqual(cur_page.Type, "/Page")
|
||||||
self.assertEqual(cur_page.Resources.keys(),
|
self.assertEqual(cur_page.Resources.keys(),
|
||||||
[PdfName.XObject])
|
{"/XObject"})
|
||||||
self.assertEqual(cur_page.Resources.XObject.keys(),
|
self.assertEqual(cur_page.Resources.XObject.keys(),
|
||||||
[PdfName.Im0])
|
{"/Im0"})
|
||||||
self.assertEqual(cur_page.Contents.keys(),
|
|
||||||
[PdfName.Length])
|
|
||||||
self.assertEqual(cur_page.Contents.Length,
|
self.assertEqual(cur_page.Contents.Length,
|
||||||
str(len(cur_page.Contents.stream)))
|
len(cur_page.Contents.read_bytes()))
|
||||||
self.assertEqual(cur_page.Contents.stream,
|
self.assertEqual(cur_page.Contents.read_bytes(),
|
||||||
"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
|
b"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
|
||||||
"/Im0 Do\nQ" % (pagewidth, pageheight))
|
b"/Im0 Do\nQ" % (pagewidth, pageheight))
|
||||||
|
|
||||||
imgprops = cur_page.Resources.XObject.Im0
|
imgprops = cur_page.Resources.XObject.Im0
|
||||||
|
|
||||||
# test if the filter is valid:
|
# test if the filter is valid:
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode,
|
imgprops.Filter, ["/DCTDecode", "/JPXDecode",
|
||||||
PdfName.FlateDecode,
|
"/FlateDecode",
|
||||||
[PdfName.CCITTFaxDecode]])
|
pikepdf.Array([ pikepdf.Name.CCITTFaxDecode ])])
|
||||||
|
|
||||||
# test if the image has correct size
|
# test if the image has correct size
|
||||||
self.assertEqual(imgprops.Width, str(orig_img.size[0]))
|
self.assertEqual(imgprops.Width, orig_img.size[0])
|
||||||
self.assertEqual(imgprops.Height, str(orig_img.size[1]))
|
self.assertEqual(imgprops.Height, orig_img.size[1])
|
||||||
# if the input file is a jpeg then it should've been copied
|
# if the input file is a jpeg then it should've been copied
|
||||||
# verbatim into the PDF
|
# verbatim into the PDF
|
||||||
if imgprops.Filter in [PdfName.DCTDecode,
|
if imgprops.Filter in ["/DCTDecode",
|
||||||
PdfName.JPXDecode]:
|
"/JPXDecode"]:
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
cur_page.Resources.XObject.Im0.stream,
|
cur_page.Resources.XObject.Im0.read_raw_bytes(),
|
||||||
convert_load(orig_imgdata))
|
orig_imgdata)
|
||||||
elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
|
elif imgprops.Filter == pikepdf.Array([ pikepdf.Name.CCITTFaxDecode ]):
|
||||||
tiff_header = tiff_header_for_ccitt(
|
tiff_header = tiff_header_for_ccitt(
|
||||||
int(imgprops.Width), int(imgprops.Height),
|
int(imgprops.Width), int(imgprops.Height),
|
||||||
int(imgprops.Length), 4)
|
int(imgprops.Length), 4)
|
||||||
imgio = BytesIO()
|
imgio = BytesIO()
|
||||||
imgio.write(tiff_header)
|
imgio.write(tiff_header)
|
||||||
imgio.write(convert_store(
|
imgio.write(cur_page.Resources.XObject.Im0.read_raw_bytes())
|
||||||
cur_page.Resources.XObject.Im0.stream))
|
|
||||||
imgio.seek(0)
|
imgio.seek(0)
|
||||||
im = Image.open(imgio)
|
im = Image.open(imgio)
|
||||||
self.assertEqual(im.tobytes(), orig_img.tobytes())
|
self.assertEqual(im.tobytes(), orig_img.tobytes())
|
||||||
|
@ -641,13 +582,12 @@ def test_suite():
|
||||||
im.close()
|
im.close()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
|
elif imgprops.Filter == "/FlateDecode":
|
||||||
elif imgprops.Filter == PdfName.FlateDecode:
|
|
||||||
# otherwise, the data is flate encoded and has to be equal
|
# otherwise, the data is flate encoded and has to be equal
|
||||||
# to the pixel data of the input image
|
# to the pixel data of the input image
|
||||||
imgdata = zlib.decompress(
|
imgdata = zlib.decompress(
|
||||||
convert_store(cur_page.Resources.XObject.Im0.stream))
|
cur_page.Resources.XObject.Im0.read_raw_bytes())
|
||||||
if imgprops.DecodeParms:
|
if hasattr(imgprops, "DecodeParms"):
|
||||||
if orig_img.format == 'PNG':
|
if orig_img.format == 'PNG':
|
||||||
pngidat, palette = img2pdf.parse_png(orig_imgdata)
|
pngidat, palette = img2pdf.parse_png(orig_imgdata)
|
||||||
elif orig_img.format == 'TIFF' \
|
elif orig_img.format == 'TIFF' \
|
||||||
|
@ -664,11 +604,11 @@ def test_suite():
|
||||||
self.assertEqual(zlib.decompress(pngidat), imgdata)
|
self.assertEqual(zlib.decompress(pngidat), imgdata)
|
||||||
else:
|
else:
|
||||||
colorspace = imgprops.ColorSpace
|
colorspace = imgprops.ColorSpace
|
||||||
if colorspace == PdfName.DeviceGray:
|
if colorspace == "/DeviceGray":
|
||||||
colorspace = 'L'
|
colorspace = 'L'
|
||||||
elif colorspace == PdfName.DeviceRGB:
|
elif colorspace == "/DeviceRGB":
|
||||||
colorspace = 'RGB'
|
colorspace = 'RGB'
|
||||||
elif colorspace == PdfName.DeviceCMYK:
|
elif colorspace == "/DeviceCMYK":
|
||||||
colorspace = 'CMYK'
|
colorspace = 'CMYK'
|
||||||
else:
|
else:
|
||||||
raise Exception("invalid colorspace")
|
raise Exception("invalid colorspace")
|
||||||
|
@ -689,18 +629,17 @@ def test_suite():
|
||||||
im.close()
|
im.close()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception("unknown filter")
|
||||||
|
|
||||||
# now use pdfrw to parse and then write out both pdfs and check the
|
# now use pdfrw to parse and then write out both pdfs and check the
|
||||||
# result for equality
|
# result for equality
|
||||||
y = PdfReader(out)
|
y = pikepdf.open(out)
|
||||||
outx = BytesIO()
|
outx = BytesIO()
|
||||||
outy = BytesIO()
|
outy = BytesIO()
|
||||||
xwriter = PdfWriter()
|
x.save(outx, compress_streams = False, static_id=True)
|
||||||
ywriter = PdfWriter()
|
y.save(outy, compress_streams = False, static_id=True)
|
||||||
xwriter.trailer = x
|
self.assertEqual(outx.getvalue(), outy.getvalue())
|
||||||
ywriter.trailer = y
|
|
||||||
xwriter.write(outx)
|
|
||||||
ywriter.write(outy)
|
|
||||||
self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True)
|
|
||||||
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
|
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
|
||||||
# close() method
|
# close() method
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue