From 997fe8efd8c629d2dcbb943be3e3db747f0e2a1d Mon Sep 17 00:00:00 2001
From: Johannes 'josch' Schauer <josch@mister-muffin.de>
Date: Thu, 28 May 2020 12:43:08 +0200
Subject: [PATCH] src/tests/__init__.py: use pikepdf instead of pdfrw

---
 src/tests/__init__.py | 155 +++++++++++++-----------------------------
 1 file changed, 47 insertions(+), 108 deletions(-)

diff --git a/src/tests/__init__.py b/src/tests/__init__.py
index 7997218..4132e09 100644
--- a/src/tests/__init__.py
+++ b/src/tests/__init__.py
@@ -7,57 +7,11 @@ import sys
 import zlib
 from PIL import Image
 from io import StringIO, BytesIO, TextIOWrapper
+import pikepdf
+import decimal
 
 HERE = os.path.dirname(__file__)
 
-PdfReaderIO = StringIO
-
-# Recompressing the image stream makes the comparison robust against output
-# preserving changes in the zlib compress output bitstream
-# (e.g. between different zlib implementations/versions/releases).
-# Without this, some img2pdf 0.3.2 tests fail on Fedora 29/aarch64.
-# See also:
-# https://gitlab.mister-muffin.de/josch/img2pdf/issues/51
-# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/R7GD4L5Z6HELCDAL2RDESWR2F3ZXHWVX/
-def recompress_last_stream(bs):
-    length_pos = bs.rindex(b'/Length')
-    li = length_pos + 8
-    lj = bs.index(b' ', li)
-    n = int(bs[li:lj])
-    stream_pos = bs.index(b'\nstream\n', lj)
-    si = stream_pos + 8
-    sj = si + n
-    startx_pos = bs.rindex(b'\nstartxref\n')
-    xi = startx_pos + 11
-    xj = bs.index(b'\n', xi)
-    m = int(bs[xi:xj])
-
-    unc_t = zlib.decompress(bs[si:sj])
-    t = zlib.compress(unc_t)
-
-    new_len = str(len(t)).encode('ascii')
-    u = (lj-li) + n
-    v = len(new_len) + len(t)
-    off = v - u
-
-    rs = (bs[:li] + new_len + bs[lj:si] + t + bs[sj:xi]
-            + str(m+off).encode('ascii') + bs[xj:])
-
-    return rs
-
-def compare_pdf(outx, outy):
-    if b'/FlateDecode' in outx:
-        x = recompress_last_stream(outx)
-        y = recompress_last_stream(outy)
-        if x != y:
-            print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr)
-            print('recompressed outx:\n{}\nouty:\n{}\n'.format(x, y), file=sys.stderr)
-            return False
-    else:
-        if outx != outy:
-            print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr)
-    return True
-
 # convert +set date:create +set date:modify -define png:exclude-chunk=time
 
 # we define some variables so that the table below can be narrower
@@ -535,34 +489,24 @@ def test_suite():
         assert os.path.isfile(outputf)
 
         def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw):
-            try:
-                from pdfrw import PdfReader, PdfName, PdfWriter
-                from pdfrw.py23_diffs import convert_load, convert_store
-            except ImportError:
-                # the test requires pdfrw
-                self.skipTest("this test requires pdfrw")
-                return
             with open(f, "rb") as inf:
                 orig_imgdata = inf.read()
             output = img2pdf.convert(orig_imgdata, nodate=True,
                                      with_pdfrw=with_pdfrw)
-            x = PdfReader(PdfReaderIO(convert_load(output)))
-            self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root,
-                             PdfName.Size])
-            self.assertIn(x.Root.Pages.Count, ('1', '2'))
+            x = pikepdf.open(BytesIO(output))
+            self.assertIn(x.Root.Pages.Count, (1, 2))
             if len(x.Root.Pages.Kids) == '1':
                 self.assertEqual(x.Size, '7')
                 self.assertEqual(len(x.Root.Pages.Kids), 1)
             elif len(x.Root.Pages.Kids) == '2':
                 self.assertEqual(x.Size, '10')
                 self.assertEqual(len(x.Root.Pages.Kids), 2)
-            self.assertEqual(x.Info, {})
-            self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages,
-                                                     PdfName.Type])
-            self.assertEqual(x.Root.Type, PdfName.Catalog)
+            self.assertEqual(sorted(x.Root.keys()), ["/Pages",
+                                                     "/Type"])
+            self.assertEqual(x.Root.Type, "/Catalog")
             self.assertEqual(sorted(x.Root.Pages.keys()),
-                             [PdfName.Count, PdfName.Kids, PdfName.Type])
-            self.assertEqual(x.Root.Pages.Type, PdfName.Pages)
+                             ["/Count", "/Kids", "/Type"])
+            self.assertEqual(x.Root.Pages.Type, "/Pages")
             orig_img = Image.open(f)
             for pagenum in range(len(x.Root.Pages.Kids)):
                 # retrieve the original image frame that this page was
@@ -583,57 +527,54 @@ def test_suite():
 
                 def format_float(f):
                     if int(f) == f:
-                        return str(int(f))
+                        return int(f)
                     else:
-                        return ("%.4f" % f).rstrip("0")
+                        return decimal.Decimal("%.4f" % f)
 
                 self.assertEqual(sorted(cur_page.keys()),
-                                 [PdfName.Contents, PdfName.MediaBox,
-                                  PdfName.Parent, PdfName.Resources,
-                                  PdfName.Type])
+                                 ["/Contents", "/MediaBox",
+                                  "/Parent", "/Resources",
+                                  "/Type"])
                 self.assertEqual(cur_page.MediaBox,
-                                 ['0', '0', format_float(pagewidth),
-                                  format_float(pageheight)])
+                                 pikepdf.Array([0, 0, format_float(pagewidth),
+                                  format_float(pageheight)]))
                 self.assertEqual(cur_page.Parent, x.Root.Pages)
-                self.assertEqual(cur_page.Type, PdfName.Page)
+                self.assertEqual(cur_page.Type, "/Page")
                 self.assertEqual(cur_page.Resources.keys(),
-                                 [PdfName.XObject])
+                        {"/XObject"})
                 self.assertEqual(cur_page.Resources.XObject.keys(),
-                                 [PdfName.Im0])
-                self.assertEqual(cur_page.Contents.keys(),
-                                 [PdfName.Length])
+                        {"/Im0"})
                 self.assertEqual(cur_page.Contents.Length,
-                                 str(len(cur_page.Contents.stream)))
-                self.assertEqual(cur_page.Contents.stream,
-                                 "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
-                                 "/Im0 Do\nQ" % (pagewidth, pageheight))
+                                 len(cur_page.Contents.read_bytes()))
+                self.assertEqual(cur_page.Contents.read_bytes(),
+                                 b"q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n"
+                                 b"/Im0 Do\nQ" % (pagewidth, pageheight))
 
                 imgprops = cur_page.Resources.XObject.Im0
 
                 # test if the filter is valid:
                 self.assertIn(
-                    imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode,
-                                      PdfName.FlateDecode,
-                                      [PdfName.CCITTFaxDecode]])
+                    imgprops.Filter, ["/DCTDecode", "/JPXDecode",
+                                      "/FlateDecode",
+                                      pikepdf.Array([ pikepdf.Name.CCITTFaxDecode ])])
 
                 # test if the image has correct size
-                self.assertEqual(imgprops.Width, str(orig_img.size[0]))
-                self.assertEqual(imgprops.Height, str(orig_img.size[1]))
+                self.assertEqual(imgprops.Width, orig_img.size[0])
+                self.assertEqual(imgprops.Height, orig_img.size[1])
                 # if the input file is a jpeg then it should've been copied
                 # verbatim into the PDF
-                if imgprops.Filter in [PdfName.DCTDecode,
-                                       PdfName.JPXDecode]:
+                if imgprops.Filter in ["/DCTDecode",
+                                       "/JPXDecode"]:
                     self.assertEqual(
-                        cur_page.Resources.XObject.Im0.stream,
-                        convert_load(orig_imgdata))
-                elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
+                        cur_page.Resources.XObject.Im0.read_raw_bytes(),
+                        orig_imgdata)
+                elif imgprops.Filter == pikepdf.Array([ pikepdf.Name.CCITTFaxDecode ]):
                     tiff_header = tiff_header_for_ccitt(
                         int(imgprops.Width), int(imgprops.Height),
                         int(imgprops.Length), 4)
                     imgio = BytesIO()
                     imgio.write(tiff_header)
-                    imgio.write(convert_store(
-                        cur_page.Resources.XObject.Im0.stream))
+                    imgio.write(cur_page.Resources.XObject.Im0.read_raw_bytes())
                     imgio.seek(0)
                     im = Image.open(imgio)
                     self.assertEqual(im.tobytes(), orig_img.tobytes())
@@ -641,13 +582,12 @@ def test_suite():
                         im.close()
                     except AttributeError:
                         pass
-
-                elif imgprops.Filter == PdfName.FlateDecode:
+                elif imgprops.Filter == "/FlateDecode":
                     # otherwise, the data is flate encoded and has to be equal
                     # to the pixel data of the input image
                     imgdata = zlib.decompress(
-                        convert_store(cur_page.Resources.XObject.Im0.stream))
-                    if imgprops.DecodeParms:
+                        cur_page.Resources.XObject.Im0.read_raw_bytes())
+                    if hasattr(imgprops, "DecodeParms"):
                         if orig_img.format == 'PNG':
                             pngidat, palette = img2pdf.parse_png(orig_imgdata)
                         elif orig_img.format == 'TIFF' \
@@ -664,11 +604,11 @@ def test_suite():
                         self.assertEqual(zlib.decompress(pngidat), imgdata)
                     else:
                         colorspace = imgprops.ColorSpace
-                        if colorspace == PdfName.DeviceGray:
+                        if colorspace == "/DeviceGray":
                             colorspace = 'L'
-                        elif colorspace == PdfName.DeviceRGB:
+                        elif colorspace == "/DeviceRGB":
                             colorspace = 'RGB'
-                        elif colorspace == PdfName.DeviceCMYK:
+                        elif colorspace == "/DeviceCMYK":
                             colorspace = 'CMYK'
                         else:
                             raise Exception("invalid colorspace")
@@ -689,18 +629,17 @@ def test_suite():
                             im.close()
                         except AttributeError:
                             pass
+                else:
+                    raise Exception("unknown filter")
+
             # now use pdfrw to parse and then write out both pdfs and check the
             # result for equality
-            y = PdfReader(out)
+            y = pikepdf.open(out)
             outx = BytesIO()
             outy = BytesIO()
-            xwriter = PdfWriter()
-            ywriter = PdfWriter()
-            xwriter.trailer = x
-            ywriter.trailer = y
-            xwriter.write(outx)
-            ywriter.write(outy)
-            self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True)
+            x.save(outx, compress_streams = False, static_id=True)
+            y.save(outy, compress_streams = False, static_id=True)
+            self.assertEqual(outx.getvalue(), outy.getvalue())
             # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
             # close() method
             try: