diff --git a/src/tests/__init__.py b/src/tests/__init__.py index 506fc48..d0f32e3 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -476,101 +476,127 @@ def test_suite(): x = PdfReader(StringIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) - self.assertEqual(x.Size, '7') + self.assertIn(x.Root.Pages.Count, ('1', '2')) + if len(x.Root.Pages.Kids) == '1': + self.assertEqual(x.Size, '7') + self.assertEqual(len(x.Root.Pages.Kids), 1) + elif len(x.Root.Pages.Kids) == '2': + self.assertEqual(x.Size, '10') + self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) - self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) - self.assertEqual(len(x.Root.Pages.Kids), 1) - self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), - [PdfName.Contents, PdfName.MediaBox, - PdfName.Parent, PdfName.Resources, PdfName.Type]) - self.assertEqual(x.Root.Pages.Kids[0].MediaBox, - ['0', '0', '115', '48']) - self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) - self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) - self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), - [PdfName.XObject]) - self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), - [PdfName.Im0]) - self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), - [PdfName.Length]) - self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, - str(len(x.Root.Pages.Kids[0].Contents.stream))) - self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, - "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " - "Do\nQ") + orig_img = Image.open(f) + for pagenum in range(len(x.Root.Pages.Kids)): + # retrieve the original image frame that this page was + # generated from + orig_img.seek(pagenum) - imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 + ndpi = orig_img.info.get("dpi", (96.0, 96.0)) + # In python3, the returned dpi value for some tiff images will + # not be an integer but a float. To make the behaviour of + # img2pdf the same between python2 and python3, we convert that + # float into an integer by rounding. + # Search online for the 72.009 dpi problem for more info. + ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) + imgwidthpx, imgheightpx = orig_img.size + pagewidth = 72*imgwidthpx/ndpi[0] + pageheight = 72*imgheightpx/ndpi[1] - # test if the filter is valid: - self.assertIn( - imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], - [PdfName.FlateDecode], - [PdfName.CCITTFaxDecode]]) - # test if the colorspace is valid - self.assertIn( - imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, - PdfName.DeviceCMYK]) - # test if the image has correct size - orig_img = Image.open(f) - self.assertEqual(imgprops.Width, str(orig_img.size[0])) - self.assertEqual(imgprops.Height, str(orig_img.size[1])) - # if the input file is a jpeg then it should've been copied - # verbatim into the PDF - if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: - self.assertEqual( - x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, - convert_load(orig_imgdata)) - elif imgprops.Filter == [PdfName.CCITTFaxDecode]: - tiff_header = tiff_header_for_ccitt( - int(imgprops.Width), int(imgprops.Height), - int(imgprops.Length), 4) - imgio = BytesIO() - imgio.write(tiff_header) - imgio.write(convert_store( - x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) - imgio.seek(0) - im = Image.open(imgio) - self.assertEqual(im.tobytes(), orig_img.tobytes()) - try: - im.close() - except AttributeError: - pass + def format_float(f): + if int(f) == f: + return str(int(f)) + else: + return ("%.4f" % f).rstrip("0") + + self.assertEqual(sorted(x.Root.Pages.Kids[pagenum].keys()), + [PdfName.Contents, PdfName.MediaBox, + PdfName.Parent, PdfName.Resources, PdfName.Type]) + self.assertEqual(x.Root.Pages.Kids[pagenum].MediaBox, + ['0', '0', format_float(pagewidth), format_float(pageheight)]) + self.assertEqual(x.Root.Pages.Kids[pagenum].Parent, x.Root.Pages) + self.assertEqual(x.Root.Pages.Kids[pagenum].Type, PdfName.Page) + self.assertEqual(x.Root.Pages.Kids[pagenum].Resources.keys(), + [PdfName.XObject]) + self.assertEqual(x.Root.Pages.Kids[pagenum].Resources.XObject.keys(), + [PdfName.Im0]) + self.assertEqual(x.Root.Pages.Kids[pagenum].Contents.keys(), + [PdfName.Length]) + self.assertEqual(x.Root.Pages.Kids[pagenum].Contents.Length, + str(len(x.Root.Pages.Kids[pagenum].Contents.stream))) + self.assertEqual(x.Root.Pages.Kids[pagenum].Contents.stream, + "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" + "/Im0 Do\nQ" % (pagewidth, pageheight)) + + imgprops = x.Root.Pages.Kids[pagenum].Resources.XObject.Im0 + + # test if the filter is valid: + self.assertIn( + imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], + [PdfName.FlateDecode], + [PdfName.CCITTFaxDecode]]) + # test if the colorspace is valid + self.assertIn( + imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, + PdfName.DeviceCMYK]) + + # test if the image has correct size + self.assertEqual(imgprops.Width, str(orig_img.size[0])) + self.assertEqual(imgprops.Height, str(orig_img.size[1])) + # if the input file is a jpeg then it should've been copied + # verbatim into the PDF + if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: + self.assertEqual( + x.Root.Pages.Kids[pagenum].Resources.XObject.Im0.stream, + convert_load(orig_imgdata)) + elif imgprops.Filter == [PdfName.CCITTFaxDecode]: + tiff_header = tiff_header_for_ccitt( + int(imgprops.Width), int(imgprops.Height), + int(imgprops.Length), 4) + imgio = BytesIO() + imgio.write(tiff_header) + imgio.write(convert_store( + x.Root.Pages.Kids[pagenum].Resources.XObject.Im0.stream)) + imgio.seek(0) + im = Image.open(imgio) + self.assertEqual(im.tobytes(), orig_img.tobytes()) + try: + im.close() + except AttributeError: + pass - elif imgprops.Filter == [PdfName.FlateDecode]: - # otherwise, the data is flate encoded and has to be equal to - # the pixel data of the input image - imgdata = zlib.decompress( - convert_store( - x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) - colorspace = imgprops.ColorSpace - if colorspace == PdfName.DeviceGray: - colorspace = 'L' - elif colorspace == PdfName.DeviceRGB: - colorspace = 'RGB' - elif colorspace == PdfName.DeviceCMYK: - colorspace = 'CMYK' - else: - raise Exception("invalid colorspace") - im = Image.frombytes(colorspace, (int(imgprops.Width), - int(imgprops.Height)), - imgdata) - if orig_img.mode == '1': - orig_img = orig_img.convert("L") - elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): - orig_img = orig_img.convert("RGB") - self.assertEqual(im.tobytes(), orig_img.tobytes()) - # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have - # the close() method - try: - im.close() - except AttributeError: - pass + elif imgprops.Filter == [PdfName.FlateDecode]: + # otherwise, the data is flate encoded and has to be equal to + # the pixel data of the input image + imgdata = zlib.decompress( + convert_store( + x.Root.Pages.Kids[pagenum].Resources.XObject.Im0.stream)) + colorspace = imgprops.ColorSpace + if colorspace == PdfName.DeviceGray: + colorspace = 'L' + elif colorspace == PdfName.DeviceRGB: + colorspace = 'RGB' + elif colorspace == PdfName.DeviceCMYK: + colorspace = 'CMYK' + else: + raise Exception("invalid colorspace") + im = Image.frombytes(colorspace, (int(imgprops.Width), + int(imgprops.Height)), + imgdata) + if orig_img.mode == '1': + self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) + elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): + self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have + # the close() method + try: + im.close() + except AttributeError: + pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out)