From 1cd2674a2cb7820288087a6f110a404220f3bb61 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 31 May 2020 09:05:55 +0200 Subject: [PATCH] src/tests/__init__.py: recursively convert both PDFs into Python data structures and then compare for equality -- this allows comparing PDFs in which streams are compressed differently --- src/tests/__init__.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/tests/__init__.py b/src/tests/__init__.py index a565b8a..ac55cef 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -634,14 +634,30 @@ def test_suite(): else: raise Exception("unknown filter") - # now use pdfrw to parse and then write out both pdfs and check the - # result for equality + def rec(obj): + if isinstance(obj, pikepdf.Dictionary): + return {k:rec(v) for k,v in obj.items() if k != "/Parent"} + elif isinstance(obj, pikepdf.Array): + return [rec(v) for v in obj] + elif isinstance(obj, pikepdf.Stream): + ret = rec(obj.stream_dict) + stream = obj.read_raw_bytes() + assert len(stream) == ret["/Length"] + del ret["/Length"] + if ret.get("/Filter") == '/FlateDecode': + stream = obj.read_bytes() + del ret["/Filter"] + ret["stream"] = stream + return ret + elif isinstance(obj, pikepdf.Name) or isinstance(obj, pikepdf.String): + return str(obj) + elif isinstance(obj, decimal.Decimal) or isinstance(obj, str): + return obj + elif isinstance(obj, int): + return decimal.Decimal(obj) + raise Exception("unhandled: %s"%(type(obj))) y = pikepdf.open(out) - outx = BytesIO() - outy = BytesIO() - x.save(outx, compress_streams = False, static_id=True) - y.save(outy, compress_streams = False, static_id=True) - self.assertEqual(outx.getvalue(), outy.getvalue()) + self.assertEqual(rec(x.Root), rec(y.Root)) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: