From a8269391e9b57f5b9a66535c0e5101347169b1d0 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Fri, 28 Jul 2017 13:52:33 -0700 Subject: [PATCH] Implement /UserUnit scaling to support oversized PDFs The original PDF specification supported a maximum of 200x200" pages or 14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe added the /UserUnit field for pages, which allows one to specify the scaling that should be applied for user-facing numbers, while keeping the internal limit of 14400 units. Many real-world designs are larger than 200" in one direction. One example is tractor feed or rolled paper which may be easier to scan in one continuous run rather than segment into pages. /UserUnit is independent of the pixel size and resolution of the image. /UserUnit can also indicate very small page sizes but this is not implemented here. --- src/img2pdf.py | 34 ++++++++++++++++++++++++++++++---- src/tests/__init__.py | 3 +++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/src/img2pdf.py b/src/img2pdf.py index 2ac4b16..4cac4ed 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -367,7 +367,7 @@ class pdfdoc(object): def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, - pageheight): + pageheight, userunit=None): if self.with_pdfrw: from pdfrw import PdfDict, PdfName, PdfObject from pdfrw.py23_diffs import convert_load @@ -436,6 +436,11 @@ class pdfdoc(object): page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight] page[PdfName.Resources] = resources page[PdfName.Contents] = content + if userunit is not None: + # /UserUnit requires PDF 1.6 + if self.writer.version < '1.6': + self.writer.version = '1.6' + page[PdfName.UserUnit] = userunit self.writer.addpage(page) @@ -984,6 +989,17 @@ def get_fixed_dpi_layout_fun(fixed_dpi): return fixed_dpi_layout_fun +def find_scale(pagewidth, pageheight): + """Find the power of 10 (10, 100, 1000...) that will reduce the scale + below the PDF specification limit of 14400 PDF units (=200 inches)""" + from math import log10, ceil + + major = max(pagewidth, pageheight) + oversized = major / 14400.0 + + return 10 ** ceil(log10(oversized)) + + # given one or more input image, depending on outputstream, either return a # string containing the whole PDF if outputstream is None or write the PDF # data to the given file-like object and return None @@ -1001,7 +1017,8 @@ def convert(*images, **kwargs): viewer_initial_page=None, viewer_magnification=None, viewer_page_layout=None, viewer_fit_window=False, viewer_center_window=False, viewer_fullscreen=False, - with_pdfrw=True, outputstream=None, first_frame_only=False) + with_pdfrw=True, outputstream=None, first_frame_only=False, + allow_oversized=True) for kwname, default in _default_kwargs.items(): if kwname not in kwargs: kwargs[kwname] = default @@ -1051,18 +1068,27 @@ def convert(*images, **kwargs): rawdata, kwargs['colorspace'], kwargs['first_frame_only']): pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi) + + userunit = None if pagewidth < 3.00 or pageheight < 3.00: logging.warning("pdf width or height is below 3.00 - too " "small for some viewers!") elif pagewidth > 14400.0 or pageheight > 14400.0: - raise PdfTooLargeError( + if kwargs['allow_oversized']: + userunit = find_scale(pagewidth, pageheight) + pagewidth /= userunit + pageheight /= userunit + imgwidthpdf /= userunit + imgheightpdf /= userunit + else: + raise PdfTooLargeError( "pdf width or height must not exceed 200 inches.") # the image is always centered on the page imgxpdf = (pagewidth - imgwidthpdf)/2.0 imgypdf = (pageheight - imgheightpdf)/2.0 pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, - imgypdf, pagewidth, pageheight) + imgypdf, pagewidth, pageheight, userunit) if kwargs['outputstream']: pdf.tostream(kwargs['outputstream']) diff --git a/src/tests/__init__.py b/src/tests/__init__.py index e6f3508..833cfbd 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -26,6 +26,7 @@ psp = (504, 972) # --pagesize portrait isl = (756, 324) # --imgsize landscape isp = (324, 756) # --imgsize portrait border = (162, 270) # --border +poster = (97200, 50400) # there is no need to have test cases with the same images with inverted # orientation (landscape/portrait) because --pagesize and --imgsize are # already inverted @@ -404,6 +405,8 @@ layout_test_cases = [ (972, 504), (864, 432)), (psl, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 179 (972, 504), (864, 432)), + (poster, None, None, f_fill, 0, (97200, 50400), (151200, 50400), + (97200, 50400), (100800, 50400)), ]