Implement /UserUnit scaling to support oversized PDFs

The original PDF specification supported a maximum of 200x200" pages or
14400 PDF units. In PDF 1.6 rather than remove this limitation, Adobe
added the /UserUnit field for pages, which allows one to specify the
scaling that should be applied for user-facing numbers, while keeping
the internal limit of 14400 units.

Many real-world designs are larger than 200" in one direction. One
example is tractor feed or rolled paper which may be easier to scan in
one continuous run rather than segment into pages.

/UserUnit is independent of the pixel size and resolution of the image.

/UserUnit can also indicate very small page sizes but this is not
implemented here.
This commit is contained in:
James R. Barlow 2017-07-28 13:52:33 -07:00 committed by Johannes 'josch' Schauer
parent b54617de19
commit a8269391e9
2 changed files with 33 additions and 4 deletions

View file

@ -367,7 +367,7 @@ class pdfdoc(object):
def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata,
imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth,
pageheight):
pageheight, userunit=None):
if self.with_pdfrw:
from pdfrw import PdfDict, PdfName, PdfObject
from pdfrw.py23_diffs import convert_load
@ -436,6 +436,11 @@ class pdfdoc(object):
page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
page[PdfName.Resources] = resources
page[PdfName.Contents] = content
if userunit is not None:
# /UserUnit requires PDF 1.6
if self.writer.version < '1.6':
self.writer.version = '1.6'
page[PdfName.UserUnit] = userunit
self.writer.addpage(page)
@ -984,6 +989,17 @@ def get_fixed_dpi_layout_fun(fixed_dpi):
return fixed_dpi_layout_fun
def find_scale(pagewidth, pageheight):
"""Find the power of 10 (10, 100, 1000...) that will reduce the scale
below the PDF specification limit of 14400 PDF units (=200 inches)"""
from math import log10, ceil
major = max(pagewidth, pageheight)
oversized = major / 14400.0
return 10 ** ceil(log10(oversized))
# given one or more input image, depending on outputstream, either return a
# string containing the whole PDF if outputstream is None or write the PDF
# data to the given file-like object and return None
@ -1001,7 +1017,8 @@ def convert(*images, **kwargs):
viewer_initial_page=None, viewer_magnification=None,
viewer_page_layout=None, viewer_fit_window=False,
viewer_center_window=False, viewer_fullscreen=False,
with_pdfrw=True, outputstream=None, first_frame_only=False)
with_pdfrw=True, outputstream=None, first_frame_only=False,
allow_oversized=True)
for kwname, default in _default_kwargs.items():
if kwname not in kwargs:
kwargs[kwname] = default
@ -1051,18 +1068,27 @@ def convert(*images, **kwargs):
rawdata, kwargs['colorspace'], kwargs['first_frame_only']):
pagewidth, pageheight, imgwidthpdf, imgheightpdf = \
kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi)
userunit = None
if pagewidth < 3.00 or pageheight < 3.00:
logging.warning("pdf width or height is below 3.00 - too "
"small for some viewers!")
elif pagewidth > 14400.0 or pageheight > 14400.0:
raise PdfTooLargeError(
if kwargs['allow_oversized']:
userunit = find_scale(pagewidth, pageheight)
pagewidth /= userunit
pageheight /= userunit
imgwidthpdf /= userunit
imgheightpdf /= userunit
else:
raise PdfTooLargeError(
"pdf width or height must not exceed 200 inches.")
# the image is always centered on the page
imgxpdf = (pagewidth - imgwidthpdf)/2.0
imgypdf = (pageheight - imgheightpdf)/2.0
pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat,
imgdata, imgwidthpdf, imgheightpdf, imgxpdf,
imgypdf, pagewidth, pageheight)
imgypdf, pagewidth, pageheight, userunit)
if kwargs['outputstream']:
pdf.tostream(kwargs['outputstream'])

View file

@ -26,6 +26,7 @@ psp = (504, 972) # --pagesize portrait
isl = (756, 324) # --imgsize landscape
isp = (324, 756) # --imgsize portrait
border = (162, 270) # --border
poster = (97200, 50400)
# there is no need to have test cases with the same images with inverted
# orientation (landscape/portrait) because --pagesize and --imgsize are
# already inverted
@ -404,6 +405,8 @@ layout_test_cases = [
(972, 504), (864, 432)),
(psl, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 179
(972, 504), (864, 432)),
(poster, None, None, f_fill, 0, (97200, 50400), (151200, 50400),
(97200, 50400), (100800, 50400)),
]