src/tests/__init__.py: recursively convert both PDFs into Python data structures and then compare for equality -- this allows comparing PDFs in which streams are compressed differently

This commit is contained in:
Johannes 'josch' Schauer 2020-05-31 09:05:55 +02:00
parent f4b296cef3
commit 1cd2674a2c
Signed by: josch
GPG key ID: F2CBA5C78FBD83E1

View file

@ -634,14 +634,30 @@ def test_suite():
else: else:
raise Exception("unknown filter") raise Exception("unknown filter")
# now use pdfrw to parse and then write out both pdfs and check the def rec(obj):
# result for equality if isinstance(obj, pikepdf.Dictionary):
return {k:rec(v) for k,v in obj.items() if k != "/Parent"}
elif isinstance(obj, pikepdf.Array):
return [rec(v) for v in obj]
elif isinstance(obj, pikepdf.Stream):
ret = rec(obj.stream_dict)
stream = obj.read_raw_bytes()
assert len(stream) == ret["/Length"]
del ret["/Length"]
if ret.get("/Filter") == '/FlateDecode':
stream = obj.read_bytes()
del ret["/Filter"]
ret["stream"] = stream
return ret
elif isinstance(obj, pikepdf.Name) or isinstance(obj, pikepdf.String):
return str(obj)
elif isinstance(obj, decimal.Decimal) or isinstance(obj, str):
return obj
elif isinstance(obj, int):
return decimal.Decimal(obj)
raise Exception("unhandled: %s"%(type(obj)))
y = pikepdf.open(out) y = pikepdf.open(out)
outx = BytesIO() self.assertEqual(rec(x.Root), rec(y.Root))
outy = BytesIO()
x.save(outx, compress_streams = False, static_id=True)
y.save(outy, compress_streams = False, static_id=True)
self.assertEqual(outx.getvalue(), outy.getvalue())
# the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
# close() method # close() method
try: try: